def search_cocrystals(filter_solvents=True):
    '''
    Search the whole CSD for structures that contain two different molecules
    with the specific settings
    '''
    start_time = time.clock()
    csd = MoleculeReader('CSD')
    entry_reader = EntryReader('CSD')
    settings = search.Search.Settings()
    settings.only_organic = True
    settings.not_polymeric = True
    settings.has_3d_coordinates = True
    settings.no_disorder = True
    settings.no_errors = True
    settings.no_ions = True
    settings.no_metals = True
    pairs=[]
    for entry in csd:
        #if len(pairs)==100:
        #    break
        if settings.test(entry):
            mol = csd.molecule(entry.identifier)
            mol.normalise_labels()
            smi= mol.smiles
            if smi !=  None:
                smi = smi.split('.')
                # We make sure that the structure consist of two different molecules
                if len(Remove(smi)) == 2:                
                    pairs.append(mol.identifier)            
    # clean the list from solvents
    if filter_solvents:
        print('Solvates and hydrates will be removed')
        solvates=[]
        name_dict={}
        for mol1 in pairs:
            mol = csd.molecule(mol1)
            e=entry_reader.entry(mol1)
            name_dict[mol1]=e.chemical_name
            for i in range(0, (len(mol.components))):
                if mol.components[i].smiles in clean_smiles.SOLVENT_SMILES:
                    solvates.append(mol.identifier)    
        solvates = Remove(solvates)
        final_cocrystals = [x for x in pairs if x not in solvates]   
        #print(name_dict) 
    else:
        final_cocrystals=pairs
    # Clean the list from polymorphs
    cocrystals = remove_polymorphs(final_cocrystals)
    #print the time
    end_time = time.clock()
    name=[]
    name= [name_dict[i] for i in cocrystals]
    cocrystals_data= pd.concat([pd.DataFrame(cocrystals, columns=['csd_id']), pd.DataFrame(name, columns=['name'])], axis=1)
    cocrystals_data=cocrystals_data.dropna(axis=0)
    dataset_cocrystals = cocrystals_data[~cocrystals_data.name.str.contains("solvate")]
    dataset_cocrystals = dataset_cocrystals[~dataset_cocrystals.name.str.contains("clathrate")] 
     
    print(end_time-start_time)
    dataset_cocrystals.to_csv('new_all_cocrystals.csv',index=False)
    return cocrystals
Exemple #2
0
def get_entry(identifier, database="CSD"):
    """
    input an identifier as a string and get the
    ccdc.entry.Entry object
    """
    csd_reader = EntryReader(database)
    entry = csd_reader.entry(identifier)
    return entry
Exemple #3
0
    def __init__(self):

        d = "CSD_Drug_Subset_updated.gcd"
        m = "MOF_subset.gcd"

        self.drugs = self.get_refcodes(d)
        self.mofs = self.get_refcodes(m)

        self.subset = []
        self.refcode = []
        self.year = []
        self.smiles = []

        self.data = [
            self.get_information(entry) for entry in EntryReader('CSD')
        ]
Exemple #4
0
    def run(self):
        #  inputs
        with HotspotReader(self.args.hotspot_path) as reader:
            hr = [
                h for h in reader.read()
                if h.identifier == self.args.hotspot_identifier
            ][0]

        with MoleculeReader(self.args.docked_mols) as reader:
            out = os.path.join(os.path.dirname(self.args.docked_mols),
                               "results_no_dummy.mol2")
            with MoleculeWriter(out) as writer:
                for mol in reader:
                    for atm in mol.atoms:
                        if atm.atomic_symbol == "Unknown":
                            mol.remove_atom(atm)
                    writer.write(mol)

        self.args.docked_mols = out
        entires = EntryReader(self.args.docked_mols)

        #  outputs
        out_dir = os.path.join(os.path.dirname(self.args.docked_mols))
        print(out_dir)
        #  process
        hr = augmentation(hr, entires)

        # 1) rescore
        rescored = {e: score(hr, e) for e in entires}
        ordered_rescored = OrderedDict(
            sorted(rescored.items(), key=lambda item: item[1], reverse=True))

        # 2) deduplicate: retain highest ranked pose only
        out_dic = deduplicate(ordered_rescored)
        # 3) output to dataframe ready for ccdc.descriptors API
        df = pd.DataFrame({
            "identifier": [e.identifier for e in out_dic.keys()],
            "score":
            list(out_dic.values()),
            "activity": [activity_tag(e.identifier) for e in out_dic.keys()]
        })

        df.to_csv(os.path.join(out_dir, "rescored.csv"))

        with EntryWriter(os.path.join(out_dir, "rescored.sdf")) as w:
            for e in out_dic.keys():
                w.write(e)
Exemple #5
0
    def gatherMatches(self):
        """
        Set up as a raw string gather matches for now
        TODO: create crystal flattener

        :return:
        """
        #Refine the hit list here to match group.
        #TMP: right now im just going to return a list of identifiers
        logger.info("Sending request for hits results\n\n")
        #cellLib.displayHits(self.searchHits)

        allHits = []
        #TODO: cast to typem
        ## TMP str return
        #line = ""
        #for hit in self.searchHits:
        #      line += hit.crystal.to_string(format='sdf') +"\n"

        #Quick + dirty format to just display on other side
        for hit in self.searchHits:
            #logger.info("Casting one hit " + hit.crystal.to_string(format='sdf'))
            detailsImp = []
            logger.info(hit.molecule.identifier)
            logger.info("CCDC number: " + str(hit.entry.ccdc_number))
            
            details = EntryReader('CSD').entry(hit.identifier)
            detailsImp.append(hit.identifier) # refcode
            logger.info(str(details.crystal.cell_lengths[0]))
            
            detailsImp.append(str(details.crystal.cell_lengths[0]))
            
            detailsImp.append(str(details.crystal.cell_lengths[1]))
            detailsImp.append(str(details.crystal.cell_lengths[2]))

            detailsImp.append(str(details.crystal.cell_angles[0]))
            detailsImp.append(str(details.crystal.cell_angles[1]))
            detailsImp.append(str(details.crystal.cell_angles[2]))

            detailsImp.append(hit.crystal.formula)

            #detailsImp.append(hit.crystal.lattice_centring)
            allHits.append(detailsImp)

        return allHits
def reportGenerator(filepath,refcode):
    """
    :param filepath:
    :param refcode:
    :return:path to where generated html report can be found
    """
    entry = EntryReader('csd').entry(refcode)
    mol = entry.molecule
    atoms = mol.atoms
    bonds = mol.bonds
    img = DiagramGenerator().image(mol)
    doi = entry.publication.doi
    if doi is None:
        doi = ' '
    else:
        doi = '<a href="http://dx.doi.org/%s">%s</a>' % (doi, doi)

    template_file_name = os.path.join(
        os.path.dirname(__file__), 'simple_report_template.html'
    )

    template = unicode(open(template_file_name).read())

    fileGenPath = os.path.join(filepath + refcode+ '.html')
    with open(fileGenPath, 'w') as html:
        s = template.format(
            entry=entry,
            molecule=mol,
            image=img,
            doi=doi,
            synonyms='; '.join(s for s in entry.synonyms),
            counts=dict(
                natoms=len(atoms),
                ndonors=len([a for a in atoms if a.is_donor]),
                nacceptors=len([a for a in atoms if a.is_acceptor]),
                nrot_bonds=len([b for b in bonds if b.is_rotatable]),
            ),
        )
        html.write(s.encode('utf8'))

    return fileGenPath
    def anal(self, queryTargetPath, normalizeFlag=False):
        """Perform geometrical analysis against the CCDC data source-"""
        retD = {}
        targetStructures = EntryReader(queryTargetPath)

        for e in targetStructures:
            mol = e.molecule
            if normalizeFlag:
                mol.assign_bond_types(which="unknown")
                mol.standardise_aromatic_bonds()
                mol.standardise_delocalised_bonds()
            #
            logger.info("begin analysis - for %s", queryTargetPath)
            gam = self.__engine.analyse_molecule(mol)
            bondOutliers = len(
                [b for b in gam.analysed_bonds if b.unusual and b.enough_hits])
            angleOutliers = len([
                a for a in gam.analysed_angles if a.unusual and a.enough_hits
            ])
            torsionOutliers = len([
                t for t in gam.analysed_torsions if t.unusual and t.enough_hits
            ])
            ringOutliers = len(
                [r for r in gam.analysed_rings if r.unusual and r.enough_hits])

            bL = self.__getBondAnalysis(gam)
            aL = self.__getAngleAnalysis(gam)
            tL = self.__getTorsionAnalysis(gam)
            rL = self.__getRingAnalysis(gam)
            retD = {
                "bond_outliers": bondOutliers,
                "angle_outliers": angleOutliers,
                "torsion_outliers": torsionOutliers,
                "ring_outliers": ringOutliers,
                "bond_list": bL,
                "angle_list": aL,
                "torsion_list": tL,
                "ring_list": rL,
            }
        return retD
def analyse_structures(user_gcd_input, user_csv_output):

    if len(os.path.splitext(user_csv_output)[1]) == 0:
        user_csv_output += ".csv"

    with open(user_csv_output, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(('Refcode', 'dimensionality', 'number in gcd file'))

        csd_reader = EntryReader(user_gcd_input, 'CSD')

        t2 = time.time()
        n_structures = 0
        n_mof = 0
        n_non_mof = 0

        for entry in csd_reader:
            print('CSD entry: ' + str(entry.identifier))
            n_structures += 1  # quick counter
            count_polymers = 0
            for component in entry.molecule.components:
                if component.is_polymeric:
                    count_polymers += 1
            if count_polymers > 1:
                print('multiple polymer units present')
            if entry.molecule.heaviest_component.is_polymeric:
                n_mof += 1
                framework = entry.molecule.heaviest_component

                framework.remove_hydrogens(
                )  # next steps fail if any atoms in the unit do not have coordinates

                entry.crystal.molecule = framework

                fig = dimensionality(entry)

                if fig == 0:
                    dimension = '0D non-MOF'
                elif fig == 1:
                    dimension = '1D chain'
                elif fig == 2:
                    dimension = '2D sheet'
                elif fig == 3:
                    dimension = '3D framework'
            else:
                n_non_mof += 1
                dimension = 'no polymeric bonds detected'

            print('Framework dimensions for CSD entry % s: % s \n' %
                  (entry.identifier, dimension))
            writer.writerow((entry.identifier, dimension, n_structures))
            f.flush()

        print('Total MOF subset size is: % d' % n_structures)
        print('Entries recognised as polyermic is: % d' % n_mof)
        print('Entries not recognised as polymeric (and ignored) is: % d' %
              n_non_mof)

        t3 = time.time()
        overall_time_taken = str(t3 - t2)
        print('total time elapsed for script % s' % overall_time_taken)
        f.close()
    def search(self,
               queryTargetId,
               queryTargetPath,
               resultPath,
               normalizeFlag=True,
               maxHits=50,
               searchType="similarity",
               suppressMetals=False):
        """Search the CCDC database for similar or substructure matches for the input query molecule.

        Args:
            queryTargetId (str): query identifier
            queryTargetPath (str): path to the query molfile (mol, sdf, mol2)
            resultPath (str): output path to match results
            normalizeFlag (bool, optional): do standard perceptions on matching molecules. Defaults to True.
            maxHits (int, optional): maximum number of matches to return. Defaults to 50.
            searchType (str, optional): search mode (substructure, similarity). Defaults to "similarity".
            suppressMetals (bool, optional): filter structures containing metals. Defaults to False.

        Returns:
            (int): number of matches
        """

        mU = MarshalUtil()
        logger.info("Start search for target %s path %s result path %s",
                    queryTargetId, queryTargetPath, resultPath)
        #
        summaryList = []
        #
        targetDirPath = os.path.dirname(queryTargetPath)
        cifTargetPath = os.path.join(targetDirPath, queryTargetId + ".cif")

        #
        targetStructures = EntryReader(queryTargetPath)
        dirPath = os.path.join(resultPath, queryTargetId)
        numHits = 0
        for ii, e in enumerate(targetStructures, 1):
            numHits = 0
            startTime = time.time()
            targetMol = e.molecule
            if normalizeFlag:
                targetMol.assign_bond_types(which="unknown")
                targetMol.standardise_aromatic_bonds()
                targetMol.standardise_delocalised_bonds()
            #
            logger.info("(%d) begin %s search - query id %s", ii, searchType,
                        queryTargetId)
            if searchType == "similarity":
                hits = self.__similaritySearch(targetMol,
                                               suppressMetals=suppressMetals)
            elif searchType == "substructure":
                hits = self.__moleculeSubstructureSearch(
                    targetMol, suppressMetals=suppressMetals)
            else:
                hits = []
            logger.info("(%d) completed search query id %s in %.3f seconds",
                        ii, queryTargetId,
                        time.time() - startTime)

            if hits:
                numHits += len(hits)
                logger.info("(%d) search for %s matched %d: %r", ii,
                            queryTargetId, numHits,
                            [targetHit.identifier for targetHit in hits])

                #
                for targetHit in hits[:maxHits]:
                    #
                    hI = CcdcMatchIndexInst()
                    hI.setCsdVersion(csd_version())
                    hI.setCsdDirectory(csd_directory())
                    hI.setTargetId(queryTargetId)
                    hI.setTargetPath(queryTargetPath)
                    if mU.exists(cifTargetPath):
                        hI.setTargetCcPath(cifTargetPath)
                    hI.setIdentifier(targetHit.identifier)
                    hI.setMatchType(searchType)
                    try:
                        hI.setRFactor(targetHit.entry.r_factor)
                        hI.setChemicalName(targetHit.entry.chemical_name)
                        hI.setTemperature(targetHit.entry.temperature)
                        hI.setRadiationSource(targetHit.entry.radiation_source)
                        hI.setHasDisorder("N")
                        cit = targetHit.entry.publication
                        if cit.doi is not None:
                            hI.setCitationDOI(cit.doi)
                        if searchType == "similarity":
                            hI.setSimilarityScore(targetHit.similarity)
                        elif searchType == "substructure":
                            hI.setMatchedAtomLength(
                                len(targetHit.match_atoms()))
                    except Exception as e:
                        logger.exception("Failing with %s", str(e))
                        #
                    #
                    mU.mkdir(dirPath)
                    mol2L = []
                    if searchType == "substructure":
                        for jj, mc in enumerate(targetHit.match_components(),
                                                1):
                            fp = os.path.join(
                                dirPath, queryTargetId + "_" +
                                targetHit.identifier + "_%03d" % jj + ".mol2")
                            mol2L.append(fp)
                            with MoleculeWriter(fp) as ofh:
                                ofh.write(mc)
                            # Replace the title line
                            with open(fp) as fin:
                                lines = fin.readlines()
                            lines[1] = lines[1].replace(
                                "00", targetHit.identifier)
                            #
                            with open(fp, "w") as fout:
                                fout.write("".join(lines))
                            #
                            fp = os.path.join(
                                dirPath, queryTargetId + "_" +
                                targetHit.identifier + "_%03d" % jj + ".sdf")
                            with MoleculeWriter(fp) as ofh:
                                ofh.write(mc)

                            # Replace the title line
                            with open(fp) as fin:
                                lines = fin.readlines()
                            lines[0] = lines[0].replace(
                                "00", targetHit.identifier)
                            #
                            with open(fp, "w") as fout:
                                fout.write("".join(lines))
                        #
                        #  Check for multiple generated result files -
                        #
                        for jj, fp in enumerate(mol2L, 1):
                            logger.debug("(%d) adding component fp %s", jj, fp)
                            hI.setMatchNumber(jj)
                            hI.setMol2Path(fp)
                            tt = fp[:-4] + "sdf"
                            hI.setMolPath(tt)
                            summaryList.append(copy.deepcopy(hI.get()))
                            #
                    else:
                        hI.setMatchNumber(1)
                        summaryList.append(copy.deepcopy(hI.get()))
            else:
                logger.info("(%d) search for %s returns no matches", ii,
                            targetMol.identifier)
                hits = None
        #
        if numHits > 0:
            mU.mkdir(dirPath)
            fp = os.path.join(dirPath, queryTargetId + "-index.json")
            cmI = CcdcMatchIndex(indexFilePath=fp, verbose=self.__verbose)
            cmI.load(summaryList)
            cmI.writeIndex()

        return numHits
Exemple #10
0
entries = list(df.refcode)
from ccdc.search import TextNumericSearch

data = []
# for e in entries:
#     query = TextNumericSearch()
#     query.add_all_identifiers(e)
#     hits = query.search()
#     data.append(hits[0].entry.publication.doi)
# from pprint import pprint
#
# print len(data)
# print len(set(data))
from ccdc.diagram import DiagramGenerator
from ccdc.io import EntryReader

diagram_generator = DiagramGenerator()
diagram_generator.settings.font_size = 12
diagram_generator.settings.line_width = 1.6
diagram_generator.settings.image_width = 500
diagram_generator.settings.image_height = 500

csd_reader = EntryReader('CSD')
mols = set([csd_reader.entry(m) for m in entries])

for i, e in enumerate(mols):
    img = diagram_generator.image(e)

    img.save("hit{}.png".format(i))
Exemple #11
0
class CSD_powder:
    """
    CSD_powder class
    #######################
    this class calculates d_spacing, intensities and two theta for a specific crystal
    
    Attributes
    -------------
    
    entry [ccdc entry reader method]
    crystal_name str the name of a crystal of form 'AABHTZ'
    
    Methods
    ---------------
    
    __init__()
    ---------------
    takes 2 arguments self,name [str]
    sets crystal name
    
    load_d_space()
    ---------------
    uses name atr
    calls ccdc PowderPattern class
    calculates d_spacing using braggs law
    
    returns d_space[list of d_spacing], intensities[list of peak intensities]
    
    load_intensities()
    ------------------
    uses name atr
    calls ccdc PowderPattern class
    returns list[intensities]
    
    load_two_theta()
    -------------------
    uses name atr
    calls ccdc PowderPattern class
    returns list[two theta angles]
    """
    def __init__(self):
        self.entry = EntryReader('CSD')

    def get_crystal_name(self):
        return self.crystal_name

    def set_crystal_name(self, value):
        self.crystal_name = value

    def load_d_space(self):
        # creates a d_space list with intensities as a second option
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        self.wavelength = PowderPattern.Wavelength.Wavelength_CuKa1
        peak_thetas = []
        #intents = pattern.intensity
        two_t = pattern.two_theta
        intents = pattern.intensity  # all pattern intensity
        intensity = []  # final list of intensities
        for i in pattern.tick_marks:
            l = i.two_theta  # two theta vals
            for j, I in zip(two_t, intents):
                if abs(l - j) < 0.01:
                    # compare lists and find peak 2theta values the above assumption may be changed
                    peak_thetas.append(j)  # tick two theta val
                    intensity.append(
                        I)  # add the intensity of those peaks to a list
                    break
        d_space = []  # list of d_spaces
        peak_thetas = np.array(peak_thetas) / 2  # theta vals instead of 2theta
        peak_radians = peak_thetas * np.pi / 180  # to radians
        for peak in peak_radians:
            d = self.wavelength / (2 * np.sin(peak)
                                   )  # get the space values in Angstorms
            d_space.append(d)  # append final values to a list

        return d_space, intensity

    def load_intensities(self):
        # loads the intensities of a given crsytal
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        return pattern.intensity

    def load_two_theta(self):
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        return pattern.two_theta

    def get_data(self, option):
        if option == 1:
            d_space, intensities = self.load_d_space()
            for i, j in zip(d_space, intensities):
                print i, j
        if option == 2:
            """all the data"""
            x1 = self.load_intensities()
            x2 = self.load_two_theta()
            for i, j in zip(x1, x2):
                print i, j
        else:
            sys.exit(1)
Exemple #12
0
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

from ccdc.io import EntryReader
import pandas as pd

mols = "./pharmit/query_results.sdf"
mols = EntryReader(mols)

smiles = []
name = []
rmsd = []

for m in mols:
    smiles.append(m.molecule.smiles)
    name.append(m.identifier)
    rmsd.append(m.attributes["rmsd"])

df = pd.DataFrame({"smiles": smiles, "name": name, "rmsd": rmsd})

df.to_csv("pharmit.csv")

# entries= [(m.smiles, m.identifier) for m in mols]
#
# ligs =[]
#
# for i, entry in enumerate(entries):
#     lig = Chem.MolFromSmiles(entry[0])
#     name = ""
#     for e in entry[1].split(" "):
Exemple #13
0
 def __init__(self):
     self.entry = EntryReader('CSD')
Exemple #14
0
formulas = open("formulas.txt", 'rb')
formulas_list = formulas.readlines()
new_formula_list = []

for formula_string in formulas_list:
    formula_string = formula_string.rstrip('\n')
    output_formula_list_item = []
    fully_split_up = (list(split_text(formula_string)))
    for index, index_item in enumerate(fully_split_up):
        if index % 2 == 0:
            output_formula_list_item.append(index_item +
                                            fully_split_up[(index + 1)])
    output_formula_list_item.sort()
    new_formula_list.append(output_formula_list_item)

print new_formula_list

csd_entry_reader = EntryReader('CSD')
output = open("Results.txt", 'w')

for entry in csd_entry_reader:
    for component in entry.molecule.components:
        entry_formula = (component.formula).strip("(")
        entry_formula = entry_formula.strip(")n")
        entry_formula = (entry_formula).split(" ")
        entry_formula.sort()
        entry_formula = [i for i in entry_formula if re.search('[a-zA-Z]', i)]
        print entry.identifier
        if entry_formula in new_formula_list:
            output.write(component.formula + "," + entry.identifier + "\n")
 def __init__(self):
     self.entry = EntryReader('CSD')
Exemple #16
0
 def __init__(self,name):
     ""
     self.entry = EntryReader('CSD')
     self.crystal_name = name
Exemple #17
0
from ioAndInterfaces import ccdcCrystalToASE
from ccdc.io import EntryReader
from ase.io import write as ASEWrite

csdEntryReader = EntryReader('CSD')

aseCrystal = ccdcCrystalToASE(csdEntryReader.crystal('ABEBUF'))
ASEWrite('temp.xyz', aseCrystal)
def create_dataframe(base, run_id, pdbs):
    format_dic = {
        "asp": "ASP",
        "chemscore": "Chemscore",
        "goldscore": "Goldscore",
        "plp": "PLP"
    }

    data = {
        "pdb": [],
        "runid": [],
        "pose_id": [],
        "pose_rank": [],
        "dock_func": [],
        "dock_fitness": [],
        "rescore_func": [],
        "rescore_fitness": [],
        "gold_score": [],
        "rmsd": [],
        "rmsd_rank": []
    }

    pdbs = [
        pdb for pdb in pdbs if os.path.isdir(os.path.join(base, pdb, run_id))
    ]

    for pdb in tqdm(pdbs):

        dpath = os.path.join(base, pdb, run_id)
        funcs = [
            d for d in os.listdir(dpath)
            if not os.path.isfile(os.path.join(dpath, d))
        ]

        for func in funcs:

            ff_a, ff_b = process_ff_label(func)

            s = []  # for the ranking
            r = []
            for i in range(1, 31):
                pose = EntryReader(
                    os.path.join(dpath, func, "data",
                                 f"ranked_{pdb}_ligand_m1_{i}.mol2"))[0]
                attr = pose.attributes
                score = float(attr["Gold.Score"].split("\n")[1][:5])
                fit_score = {
                    k.split(".")[1]: attr[k]
                    for k in [a for a in attr.keys() if "Fitness" in a]
                }
                rmsd = attr["Gold.Reference.RMSD"]

                data["pdb"].append(pdb)
                data["runid"].append(run_id)
                data["pose_id"].append(i)
                data["dock_func"].append(ff_a)
                data["dock_fitness"].append(float(fit_score[format_dic[ff_a]]))
                data["gold_score"].append(score)
                r.append(float(rmsd))

                if ff_b is None:
                    data["rescore_func"].append(ff_a)
                    s.append(float(fit_score[format_dic[ff_a]]))
                else:
                    data["rescore_func"].append(ff_b)
                    s.append(float(fit_score[format_dic[ff_b]]))

            data["rescore_fitness"].extend(s)
            data["pose_rank"].extend(rank_array(s))

            data["rmsd"].extend(r)
            data["rmsd_rank"].extend(rank_array(r))

    return pd.DataFrame(data)
class CSD_powder:
    """
    CSD_powder class
    #######################
    this class calculates d_spacing, intensities and two theta for a specific crystal
    
    Attributes
    -------------
    
    entry [ccdc entry reader method]
    crystal_name str the name of a crystal of form 'AABHTZ'
    
    Methods
    ---------------
    
    __init__()
    ---------------
    takes 2 arguments self,name [str]
    sets crystal name
    
    load_d_space()
    ---------------
    uses name atr
    calls ccdc PowderPattern class
    calculates d_spacing using braggs law
    
    returns d_space[list of d_spacing], intensities[list of peak intensities]
    
    load_intensities()
    ------------------
    uses name atr
    calls ccdc PowderPattern class
    returns list[intensities]
    
    load_two_theta()
    -------------------
    uses name atr
    calls ccdc PowderPattern class
    returns list[two theta angles]
    """
    
    def __init__(self):
        self.entry = EntryReader('CSD')
        
        
    def get_crystal_name(self):
        return self.crystal_name
    
    def set_crystal_name(self,value):
        self.crystal_name = value
    
    def load_d_space(self):
        # creates a d_space list with intensities as a second option
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        self.wavelength = PowderPattern.Wavelength.Wavelength_CuKa1
        peak_thetas = []
        #intents = pattern.intensity
        two_t = pattern.two_theta
        intents = pattern.intensity # all pattern intensity
        intensity = [] # final list of intensities
        for i in pattern.tick_marks:
            l = i.two_theta # two theta vals
            for j,I in zip(two_t,intents):
                if abs(l-j) < 0.01:
                    # compare lists and find peak 2theta values the above assumption may be changed
                    peak_thetas.append(j) # tick two theta val
                    intensity.append(I) # add the intensity of those peaks to a list
                    break
        d_space = [] # list of d_spaces
        peak_thetas = np.array(peak_thetas)/2 # theta vals instead of 2theta
        peak_radians = peak_thetas*np.pi/180 # to radians
        for peak in peak_radians:
            d = self.wavelength/(2*np.sin(peak)) # get the space values in Angstorms
            d_space.append(d) # append final values to a list
            
        return d_space,intensity
    
    def load_intensities(self):
        # loads the intensities of a given crsytal
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        return pattern.intensity
        
    def load_two_theta(self):
        crystal = self.entry.crystal(self.crystal_name)
        pattern = PowderPattern.from_crystal(crystal)
        return pattern.two_theta
    
    def get_data(self,option):
        if option == 1:
            d_space,intensities = self.load_d_space()
            for i,j in zip(d_space,intensities):
                print i,j
        if option == 2:
            """all the data"""
            x1 = self.load_intensities()
            x2 = self.load_two_theta()
            for i,j in zip(x1,x2):
                print i,j
        else:
            sys.exit(1)