def ExecuteOperation(vStack,opStack, subformulas):
	
	op = opStack.pop()
	rightOperand = vStack.pop()
	if(op.valor==u'¬'):
		subFormula = Tokenizer(''+ op.valor +rightOperand.valor, "Subformula" )
		subFormula.setValue(not(rightOperand.booleano))
		vStack.append(subFormula)
		subformulas.append(subFormula)
		return vStack, opStack, subformulas
	
	leftOperand = vStack.pop()
	subFormula = Tokenizer(''+leftOperand.valor + op.valor + rightOperand.valor, "Subformula" )
	if(op.valor==u'->'):
		if(leftOperand.booleano==False or rightOperand.booleano == True):
			subFormula.setValue(True)
		else:
			subFormula.setValue(False)
	elif(op.valor==u'<->'):
		if(leftOperand.booleano==rightOperand.booleano):
			subFormula.setValue(True)
		else:
			subFormula.setValue(False)
	elif(op.valor==u'∨'):
		subFormula.setValue(leftOperand.booleano or rightOperand.booleano)
	elif(op.valor==u'∧'):
		subFormula.setValue(leftOperand.booleano and rightOperand.booleano)

	vStack.append(subFormula)
	subformulas.append(subFormula)
	return vStack, opStack, subformulas
def retiraToken(expressao,i):

	if(expressao[i]==" " or expressao[i]=="\n"):
		i = i + 1;
	if(expressao[i]==u'∨' or expressao[i]==u'∧'):
		Token = Tokenizer(expressao[i],"Conectivos")
		return Token, i+1
	elif(expressao[i]=="t"):
		if(expressao[i:i+4]=="true"):
			Token = Tokenizer(expressao[i:i+4],"BooleanSymbols")
			Token.setValue(True)
			return Token, i+4
		else:
			Token = Tokenizer(expressao[i],"Proposicionais")
			return Token, i + 1
	elif(expressao[i]=="f"):
		if(expressao[i:i+5]=="false"):
			Token = Tokenizer(expressao[i:i+5],"BooleanSymbols")
			Token.setValue(False)
			return Token, i+5
		else:
			Token = Tokenizer(expressao[i],"Proposicionais")
			return Token, i + 1
	elif(expressao[i] =="(" or expressao[i]==")"):
		Token = Tokenizer(expressao[i],"Pontuacao")
		return Token, i+1
	elif(expressao[i]==u"¬"):
		Token = Tokenizer(expressao[i],"Negacao")
		return Token, i+1
	elif(expressao[i].isalpha()):
		Token = Tokenizer(expressao[i],"Proposicionais")
		return Token, i + 1
	elif(expressao[i]==u'<'):
		if(expressao[i:i+3]==u'<->'):
			Token = Tokenizer(expressao[i:i+3],"Conectivos")
			return Token, i+3
		else:
			sys.exit("Cadeia Inválida")
	elif(expressao[i]==u'-'):
		if(expressao[i:i+2]==u'->'):
			Token = Tokenizer(expressao[i:i+2],"Conectivos")
			return Token, i+2
		else:
			sys.exit("Cadeia Inválida")		
	else:
		sys.exit("Cadeia Inválida")
Example #3
0
            mol = self.nrm.normalize(mol)
            mol = self.lfc.choose(mol)
            mol = self.uc.uncharge(mol)
            return Chem.MolToSmiles(mol, isomericSmiles=False, canonical=True)
        else:
            return None


if __name__ == "__main__":

    with open('data/canonical_smiles.smi', 'r') as file:
        smiles = [line.rstrip() for line in file]

    print("Initial number of sequences %i" % len(smiles))
    p = Preprocess()
    t = Tokenizer()

    # Normalization, uncharging, removing chirality and light fragments
    nn_smi = [p.clean(smile) for smile in tqdm(smiles)]
    unn_smi = list(set([smile for smile in nn_smi if smile]))

    # Limit sequence length 34-128
    cl_smi = []
    for smile in unn_smi:
        if 34 <= len(t.tokenize(smile)) <= 128:
            cl_smi.append(smile)

    print("Number of sequences after cleaning %i" % len(cl_smi))

    with open('data/cleaned_smiles.smi', 'w') as file:
        for line in cl_smi: