Ejemplo n.º 1
0
    def _pipe_from_textfile(self, finp):
        while True:
            s = finp.readline()
            if s == '':
                return
            s = s.rstrip('\n').lstrip()
            if '##' in s:
                s = s[:s.index('##')]
            if '=' in s:
                s = s.split('=', 1)
                if s[0].lower().startswith('formula'):
                    self.filter.append(
                        encode_formula_to_array(parse_formula(s[1])))
            elif s.lower().startswith('dict_form'):
                self.vector_form = False

            elif s.lower().startswith('end'):
                if not self.vector_form:
                    self.required_fields = ['Formula']
                    for i in range(len(self.filter)):
                        self.filter[i] = decode_formula_from_array(
                            self.filter[i])
                    self.rejected = self.__rejected_formula

                return
Ejemplo n.º 2
0
    def _pipe_from_textfile(self, finp):
        while True:
            s = finp.readline()
            if s == '':
                return
            s = s.rstrip('\n').lstrip()
            if '##' in s:
                s = s[:s.index('##')]
            if '=' in s:
                s = s.split('=', 1)
                if s[0].lower().startswith('formula'):
                    s = s[1].split(',')
                    self.formulas.append(
                        encode_formula_to_array(parse_formula(s[0])))
                    self.scores.append(float(s[1]))
                elif s[0].lower().startswith('unknown_score'):
                    self.unknown_score = float(s[1])

            elif s.lower().startswith('dict_form'):
                self.vector_form = False

            elif s.lower().startswith('end'):
                if not self.vector_form:
                    self.required_fields = ['Formula']
                    for i in range(len(self.formulas)):
                        self.formulas[i] = decode_formula_from_array(
                            self.formulas[i])
                    self.process_molecular_candidate_record = self.__process_molecular_candidate_record_formula

                return
Ejemplo n.º 3
0
    def __repr__(self):
        flas = []
        for filtervalue in self.filter:
            if self.vector_form:
                flas.append(
                    '"%s"' %
                    decode_formula_from_array(filtervalue).formula_to_string())
            else:
                flas.append('"%s"' % filtervalue.formula_to_string())

        s = ', '.join(flas)
        return "FormulasFilter(%s)" % s
Ejemplo n.º 4
0
    def __repr__(self):
        l = []
        for i in range(len(self.formulas)):
            if self.vector_form:
                print(self.scores[i])

                l.append('"%s"=%s' % (decode_formula_from_array(
                    self.formulas[i]).formula_to_string(), self.scores[i]))
            else:
                l.append(
                    '"%s"=%s' %
                    (self.formulas[i].formula_to_string(), self.scores[i]))
        s = ', '.join(l)
        return 'FormulaScorer(%s)' % s
Ejemplo n.º 5
0
    def _pipe_to_textfile(self, fout, indent=''):
        #subindent='%s\t'%indent;
        if self.vector_form:
            fout.write('%s%s\n' % (indent, 'vector_form'))
        else:
            fout.write('%s%s\n' % (indent, 'dict_form'))

        for filtervalue in self.filter:
            if self.vector_form:
                fout.write(
                    '%s%s=%s\n' %
                    (indent, 'formula', decode_formula_from_array(
                        filtervalue).formula_to_string()))
            else:
                fout.write(
                    '%s%s=%s\n' %
                    (indent, 'formula', filtervalue.formula_to_string()))
Ejemplo n.º 6
0
    def __init__(self, formulas=None, use_vector_form=True):

        self.filter = []
        self.vector_form = use_vector_form
        #self.supported_adducts=set();
        if self.vector_form:
            self.required_fields = ['FormulaVector']
        else:
            self.required_fields = ['Formula']

        if formulas is None:
            return
        if isinstance(formulas, str):
            formulas = formulas.split(',')
            for formula in formulas:
                self.filter.append(
                    encode_formula_to_array(parse_formula(formula)))
        elif isinstance(formulas, list):
            for formula in formulas:
                if isinstance(formula, dict):
                    self.filter.append(encode_formula_to_array(formula))
                elif isinstance(formula, np.ndarray):
                    self.filter.append(formula)
                elif isinstance(formula, str):
                    fs = formula.split(',')
                    for f in fs:
                        self.filter.append(
                            encode_formula_to_array(parse_formula(f)))
                else:
                    raise TypeError(
                        'Wrong type argument for FormulasFilter initialization! str, dict, list of (dict or str) supported only!'
                    )

        elif isinstance(formulas, dict):
            self.filter.append(encode_formula_to_array(formulas))
        elif isinstance(formulas, np.ndarray):
            self.filter.append(formulas)
        else:
            raise TypeError(
                'Wrong type argument for FormulasFilter initialization! str, dict, list of (dict or str) supported only!'
            )

        if not self.vector_form:
            for i in range(len(self.filter)):
                self.filter[i] = decode_formula_from_array(self.filter[i])
Ejemplo n.º 7
0
    def _pipe_to_textfile(self, fout, indent=''):
        if self.vector_form:
            fout.write('%svector_form\n' % (indent))
        else:
            fout.write('%sdict_form\n' % (indent))

        for i in range(len(self.formulas)):
            if self.vector_form:
                fout.write('%s%s=%s,%s\n' %
                           (indent, 'formula',
                            decode_formula_from_array(
                                self.formulas[i]).formula_to_string(),
                            self.scores[i]))
            else:
                fout.write(
                    '%s%s=%s,%s\n' %
                    (indent, 'formula', self.formulas[i].formula_to_string()),
                    self.scores[i])

        fout.write('%s%s=%s\n' % (indent, 'unknown_score', self.unknown_score))
Ejemplo n.º 8
0
    def _get_next_hdf5_record(self):
        self.mzcurrent+=1;
        if self.mzcurrent>self.mzmax_index:
            raise StopIteration();                

        record=MolecularRecord();
        
        if self.charged:
            record['MZ']=self.mz_mass_charge_dataset[self.mzcurrent,0]; 
            record['Mass']=self.mz_mass_charge_dataset[self.mzcurrent,1]; 
            record['Charge']=int(self.mz_mass_charge_dataset[self.mzcurrent,2]); 
        else:
            record['MZ']=self.mz_dataset[self.mzcurrent]; 
            record['Mass']=record['MZ'];
            record['Charge']=0;

        if 'FPT' in self.required_fields:
            record['FPT']=self.fpt_dataset[self.mzcurrent,:];

        if 'Frag' in self.required_fields:
            i0=self.fragprint_index_dataset[self.mzcurrent,0];
            i1=self.fragprint_index_dataset[self.mzcurrent,1];
            
            record['Frag']=list(self.fragprint_values_dataset[i0:i1]); 
            
        
        if 'CFM' in self.required_fields:
            
            i0=self.CFM_negative_index_dataset[self.mzcurrent,0,0];
            j0=self.CFM_negative_index_dataset[self.mzcurrent,0,1];

            i1=self.CFM_negative_index_dataset[self.mzcurrent,1,0];
            j1=self.CFM_negative_index_dataset[self.mzcurrent,1,1];

            i2=self.CFM_negative_index_dataset[self.mzcurrent,2,0];
            j2=self.CFM_negative_index_dataset[self.mzcurrent,2,1];
            
            if i0>=0 and i1>=0 and i2>=0:
                n=[self.CFM_values_dataset[i0:j0,:], self.CFM_values_dataset[i1:j1,:], self.CFM_values_dataset[i2:j2,:]]
            else:
                n=[];

            i0=self.CFM_positive_index_dataset[self.mzcurrent,0,0];
            j0=self.CFM_positive_index_dataset[self.mzcurrent,0,1];

            i1=self.CFM_positive_index_dataset[self.mzcurrent,1,0];
            j1=self.CFM_positive_index_dataset[self.mzcurrent,1,1];

            i2=self.CFM_positive_index_dataset[self.mzcurrent,2,0];
            j2=self.CFM_positive_index_dataset[self.mzcurrent,2,1];

            if i0>=0 and i1>=0 and i2>=0:
                p=[self.CFM_values_dataset[i0:j0,:], self.CFM_values_dataset[i1:j1,:], self.CFM_values_dataset[i2:j2,:]]
            else:
                p=[];
            
            record['CFM']=[n, p];


        if 'CFM2' in self.required_fields:
            
            i0=self.CFM2_negative_index_dataset[self.mzcurrent,0,0];
            j0=self.CFM2_negative_index_dataset[self.mzcurrent,0,1];

            i1=self.CFM2_negative_index_dataset[self.mzcurrent,1,0];
            j1=self.CFM2_negative_index_dataset[self.mzcurrent,1,1];

            i2=self.CFM2_negative_index_dataset[self.mzcurrent,2,0];
            j2=self.CFM2_negative_index_dataset[self.mzcurrent,2,1];
            
            if i0>=0 and i1>=0 and i2>=0:
                n=[self.CFM2_values_dataset[i0:j0,:], self.CFM2_values_dataset[i1:j1,:], self.CFM2_values_dataset[i2:j2,:]]
            else:
                n=[];

            i0=self.CFM2_positive_index_dataset[self.mzcurrent,0,0];
            j0=self.CFM2_positive_index_dataset[self.mzcurrent,0,1];

            i1=self.CFM2_positive_index_dataset[self.mzcurrent,1,0];
            j1=self.CFM2_positive_index_dataset[self.mzcurrent,1,1];

            i2=self.CFM2_positive_index_dataset[self.mzcurrent,2,0];
            j2=self.CFM2_positive_index_dataset[self.mzcurrent,2,1];

            if i0>=0 and i1>=0 and i2>=0:
                p=[self.CFM2_values_dataset[i0:j0,:], self.CFM2_values_dataset[i1:j1,:], self.CFM2_values_dataset[i2:j2,:]]
            else:
                p=[];
            
            record['CFM2']=[n, p];

            
        if 'SMILES' in self.required_fields:    
            i0=self.smiles_dataset[self.mzcurrent,0];
            i1=self.smiles_dataset[self.mzcurrent,1];
            arr=self.ascii_dataset[i0:i1];
            if python_version==2:                                  
                record['SMILES']=str(bytearray(arr));
            else:
                record['SMILES']=bytes(list(arr)).decode('ascii');

        if 'IDs' in self.required_fields:            
            i0=self.ids_dataset[self.mzcurrent,0];
            i1=self.ids_dataset[self.mzcurrent,1];
            arr=self.ascii_dataset[i0:i1];
            if python_version==2:
                record['IDs']=str(bytearray(arr));
            else:
                record['IDs']=bytes(list(arr)).decode('ascii');


        if ('InChIKeyValues' in self.required_fields) or ('InChIKey' in self.required_fields):        
            kv=self.inchikey_values_dataset[self.mzcurrent,:];
            if 'InChIKeyValues' in self.required_fields:        
                record['InChIKeyValues']=kv;
                      
                      
            
            if 'InChIKey' in self.required_fields:        
                record['InChIKey']=inchikey_from_inchikeyvalues(kv);

        if ('ShortInChI' in self.required_fields) or ('InChI' in self.required_fields):# or ('Formula' in self.required_fields):            
            i=self.inchi_dataset[self.mzcurrent,0,0];
            j=self.inchi_dataset[self.mzcurrent,0,1];
            arr=self.ascii_dataset[i:j];
            if python_version==2:
                i0=str(bytearray(arr));
            else:
                i0=bytes(list(arr)).decode('ascii');

            i=self.inchi_dataset[self.mzcurrent,1,0];
            j=self.inchi_dataset[self.mzcurrent,1,1];
            arr=self.ascii_dataset[i:j];
            if python_version==2:
                i1=str(bytearray(arr));
            else:
                i1=bytes(list(arr)).decode('ascii');

            i=self.inchi_dataset[self.mzcurrent,2,0];
            j=self.inchi_dataset[self.mzcurrent,2,1];
            arr=self.ascii_dataset[i:j];
            if python_version==2:
                i2=str(bytearray(arr));
            else:
                i2=bytes(list(arr)).decode('ascii');

            i=self.inchi_dataset[self.mzcurrent,3,0];
            j=self.inchi_dataset[self.mzcurrent,3,1];
            arr=self.ascii_dataset[i:j];
            if python_version==2:
                i3=str(bytearray(arr));
            else:
                i3=bytes(list(arr)).decode('ascii');
            
            #if 'Formula' in self.required_fields:    #check which is faster
            #    record['Formula']=parse_formula(i0);
            if 'ShortInChI' in self.required_fields:
                record['ShortInChI']='%s/%s'%(i0,i1);
            if 'InChI' in self.required_fields:
                record['InChI']='%s/%s%s%s'%(i0,i1,i2,i3);
        
        if ('FormulaVector' in self.required_fields) or ('Formula' in self.required_fields):       
            fv=self.formulavector_dataset[self.mzcurrent,:];
            if ('FormulaVector' in self.required_fields):       
                record['FormulaVector']=fv;
            if ('Formula' in self.required_fields):       
                record['Formula']=decode_formula_from_array(fv);
                
        if 'ElementVector' in self.required_fields:
            record['ElementVector']=self.elementvector_dataset[self.mzcurrent,:];
            
        #Likeness
        if ('MetaLike' in self.required_fields):
            record['MetaLike']=self.metalikeness_dataset[self.mzcurrent];
            
        #End of likeness    
            
        return record;
Ejemplo n.º 9
0
    def setup_scorer(self, formulas, scores, unknown_score=0.0):
        self.formulas = []
        self.scores = []
        self.unknown_score = unknown_score
        if isinstance(formulas, str):
            formulas = formulas.split(',')
            for formula in formulas:
                self.formulas.append(
                    encode_formula_to_array(parse_formula(formula)))
        elif isinstance(formulas, list):
            for formula in formulas:
                if isinstance(formula, dict):
                    self.formulas.append(encode_formula_to_array(formula))
                elif isinstance(formula, np.ndarray):
                    self.formulas.append(formula)
                elif isinstance(formula, str):
                    fs = formula.split(',')
                    for f in fs:
                        self.formulas.append(
                            encode_formula_to_array(parse_formula(f)))
                else:
                    raise TypeError(
                        'Wrong type argument (formulas) for FormulaVectors initialization! str, formula, list of (formula, formulavector or str) supported only!'
                    )
        elif isinstance(formulas, dict):
            self.formulas.append(encode_formula_to_array(formulas))
        elif isinstance(formulas, np.ndarray):
            self.formulas.append(formulas)
        else:
            raise TypeError(
                'Wrong type argument (formulas) for FormulaVectors initialization! str, formula, list of (formula, formulavector or str) supported only!'
            )

        if isinstance(scores, str):
            scores = scores.split(',')
            for score in scores:
                self.scores.append(float(score))
        elif isinstance(scores, float):
            self.scores.append(scores)
        elif isinstance(scores, int):
            self.scores.append(float(scores))
        elif isinstance(scores, list):
            for score in scores:
                if isinstance(score, float):
                    self.scores.append(score)
                elif isinstance(score, dict):
                    self.scores.append(score)
                elif isinstance(score, int):
                    self.scores.append(float(score))
                elif isinstance(score, str):
                    score = score.split(',')
                    for s in score:
                        self.scores.append(float(s))
                else:
                    raise TypeError(
                        'Wrong type argument (scores) for FormulaScorer initialization! str, float, int, list of (float, int or str) or dictionary supported only!'
                    )
        else:
            raise TypeError(
                'Wrong type argument (scores) for FormulaScorer initialization! str, float, int, list of (float, int or str) supported only!'
            )
        if len(self.scores) != len(self.formulas):
            raise TypeError(
                'Number of formulas and number of scores supplied do not match!'
            )

        if not self.vector_form:
            for i in range(len(self.formulas)):
                self.formulas[i] = decode_formula_from_array(self.formulas[i])
Ejemplo n.º 10
0
def generate_candidate_html(mainHTML,
                            candidate,
                            outpath,
                            index,
                            subpath,
                            merged=False):
    if 'Annotation' in candidate:
        if candidate['Annotation'] == 'Correct':
            mainHTML.write('<tr><td bgcolor="#ccffcc">\n')
        else:
            mainHTML.write('<tr><td bgcolor="#e0e0e0">\n')

    else:
        mainHTML.write('<tr><td>\n')

    if 'SMILES' in candidate:
        smiles = candidate['SMILES']

    has_image = False
    if smiles != '':
        fname = os.path.join(outpath, 'candidate_%s.png' % index)
        if generate_candidate_image(fname, smiles):
            has_image = True

    if has_image:
        mainHTML.write(
            '<table><tr><td><img src="%s/candidate_%s.png"></td><td>\n' %
            (subpath, index))

    mainHTML.write('<b>Total Score:</b> %s\n' % candidate['TotalScore'])
    mainHTML.write('<br><b>Individual Scores:</b>\n')
    for score in candidate['Scores']:
        mainHTML.write('<br><b>%s:</b> %s\n' %
                       (score, candidate['Scores'][score]))

    mainHTML.write('<br><b>Mass:</b> %s, <b>Charge:</b> %s' %
                   (candidate['Mass'], candidate['Charge']))

    smiles = ''
    inchi = ''
    formula = ''
    if 'Formula' in candidate:
        formula = candidate['Formula'].formula_to_string()
    elif 'FormulaVector' in candidate:
        formula = decode_formula_from_array(
            candidate['FormulaVector']).formula_to_string()

    if formula != '':
        mainHTML.write(', <b>Formula:</b> %s' % formula)

    if merged:
        adduct = ''
        isotope = ''
        if 'Adduct' in candidate:
            adduct = candidate['Adduct']
        if 'Isotope' in candidate:
            isotope = candidate['Isotope']
        mainHTML.write('<br><b>Adduct:</b> %s, <b>Isotope:</b> %s\n' %
                       (adduct, isotope))

    if 'SMILES' in candidate:
        mainHTML.write('<br><b>SMILES:</b> %s\n' % candidate['SMILES'])

    if 'InChI' in candidate:
        mainHTML.write('<br><b>InChI</b>=1S/%s\n' % candidate['InChI'])
        inchi = 'InChI=1S/%s' % candidate['InChI']
        inchikey = inchikey_from_inchi(inchi)
        mainHTML.write('<br><b>InChIKey:</b> %s\n' % inchikey)
        mainHTML.write(
            '<br><button onclick="CallPubChem(this.id)" id="%s">PubChem</button><br>\n'
            % (inchikey))

    if has_image:
        mainHTML.write('</td></tr></table>\n')

    mainHTML.write('</td></tr>\n')