def ExecuteOperation(vStack,opStack, subformulas): op = opStack.pop() rightOperand = vStack.pop() if(op.valor==u'¬'): subFormula = Tokenizer(''+ op.valor +rightOperand.valor, "Subformula" ) subFormula.setValue(not(rightOperand.booleano)) vStack.append(subFormula) subformulas.append(subFormula) return vStack, opStack, subformulas leftOperand = vStack.pop() subFormula = Tokenizer(''+leftOperand.valor + op.valor + rightOperand.valor, "Subformula" ) if(op.valor==u'->'): if(leftOperand.booleano==False or rightOperand.booleano == True): subFormula.setValue(True) else: subFormula.setValue(False) elif(op.valor==u'<->'): if(leftOperand.booleano==rightOperand.booleano): subFormula.setValue(True) else: subFormula.setValue(False) elif(op.valor==u'∨'): subFormula.setValue(leftOperand.booleano or rightOperand.booleano) elif(op.valor==u'∧'): subFormula.setValue(leftOperand.booleano and rightOperand.booleano) vStack.append(subFormula) subformulas.append(subFormula) return vStack, opStack, subformulas
def retiraToken(expressao,i): if(expressao[i]==" " or expressao[i]=="\n"): i = i + 1; if(expressao[i]==u'∨' or expressao[i]==u'∧'): Token = Tokenizer(expressao[i],"Conectivos") return Token, i+1 elif(expressao[i]=="t"): if(expressao[i:i+4]=="true"): Token = Tokenizer(expressao[i:i+4],"BooleanSymbols") Token.setValue(True) return Token, i+4 else: Token = Tokenizer(expressao[i],"Proposicionais") return Token, i + 1 elif(expressao[i]=="f"): if(expressao[i:i+5]=="false"): Token = Tokenizer(expressao[i:i+5],"BooleanSymbols") Token.setValue(False) return Token, i+5 else: Token = Tokenizer(expressao[i],"Proposicionais") return Token, i + 1 elif(expressao[i] =="(" or expressao[i]==")"): Token = Tokenizer(expressao[i],"Pontuacao") return Token, i+1 elif(expressao[i]==u"¬"): Token = Tokenizer(expressao[i],"Negacao") return Token, i+1 elif(expressao[i].isalpha()): Token = Tokenizer(expressao[i],"Proposicionais") return Token, i + 1 elif(expressao[i]==u'<'): if(expressao[i:i+3]==u'<->'): Token = Tokenizer(expressao[i:i+3],"Conectivos") return Token, i+3 else: sys.exit("Cadeia Inválida") elif(expressao[i]==u'-'): if(expressao[i:i+2]==u'->'): Token = Tokenizer(expressao[i:i+2],"Conectivos") return Token, i+2 else: sys.exit("Cadeia Inválida") else: sys.exit("Cadeia Inválida")
mol = self.nrm.normalize(mol) mol = self.lfc.choose(mol) mol = self.uc.uncharge(mol) return Chem.MolToSmiles(mol, isomericSmiles=False, canonical=True) else: return None if __name__ == "__main__": with open('data/canonical_smiles.smi', 'r') as file: smiles = [line.rstrip() for line in file] print("Initial number of sequences %i" % len(smiles)) p = Preprocess() t = Tokenizer() # Normalization, uncharging, removing chirality and light fragments nn_smi = [p.clean(smile) for smile in tqdm(smiles)] unn_smi = list(set([smile for smile in nn_smi if smile])) # Limit sequence length 34-128 cl_smi = [] for smile in unn_smi: if 34 <= len(t.tokenize(smile)) <= 128: cl_smi.append(smile) print("Number of sequences after cleaning %i" % len(cl_smi)) with open('data/cleaned_smiles.smi', 'w') as file: for line in cl_smi: