Example #1
0
def dict_data_to_sbml(dict_data,
                      dict_orthogroups=None,
                      dict_orthologues=None,
                      strict_match=True):
    """
    Use a dict of data dict_data and dict of orthogroups dict_orthogroup to create sbml files.
    dict_data and dict_orthogroup are obtained with fun orthofinder_to_sbml
    1./ Read dict_orthogroups and check if model associated to dict_data and study org share orthologue
    2./ Read sbml of model, parse all reactions and get genes associated to reaction.
    3./ For each reactions:
        Parse genes associated to sub part (ex: (gene-a and gene-b) or gene-c) = [(gene-a,gene-b), gene-c]
        Check if study org have orthologue with at least one sub part (gene-a, gene-b) or gene-c
        if yes: add the reaction to the new sbml and change genes ids by study org genes ids
    4./ Create the new sbml file.
    
    Parameters
    ----------
    dict_data: dict
        {'study_id': study_id,
        'model_id' : model_id,
        'sbml_template': path to sbml of model',
        'output': path to the output sbml,
        'verbose': bool, if true print information
        }
    dict_orthogroup: dict
        k=orthogroup_id, v = {k = name, v = set of genes}
    verbose: bool
        if True print information
    """
    #dict_data = {'study_name':'', 'o_compare_name': '', sbml_template':'', 'output':''}
    study_id = dict_data['study_id']
    model_id = dict_data['model_id']
    sbml_template = dict_data['sbml_template']
    output = dict_data['output']
    verbose = dict_data.get('verbose')

    if dict_orthogroups:
        if verbose:
            print(
                "*Extracting orthogroups data to create sbml of {0} from {1}".
                format(study_id, model_id))

        #k = gene_id from to_compare, v = list of genes id of study
        sub_dict_orth = {}
        for k in dict_orthogroups.values():
            try:
                all_to_compare_genes = k[model_id]
                all_study_genes = k[study_id]
                for to_compare_gene in all_to_compare_genes:
                    try:
                        sub_dict_orth[to_compare_gene].update(all_study_genes)
                    except KeyError:
                        sub_dict_orth[to_compare_gene] = set(all_study_genes)
            except KeyError:
                pass

        if not sub_dict_orth:
            if verbose:
                print("\t{0} and {1} don't share any ortholgue".format(
                    study_id, model_id))
            return
    elif dict_orthologues:
        if verbose:
            print(
                "*Extracting orthologues data to create sbml of {0} from {1}".
                format(study_id, model_id))

        #k = gene_id from to_compare, v = list of genes id of study
        sub_dict_orth = {}
        for gene_id, gene_dict in dict_orthologues[model_id].items():
            try:
                sub_dict_orth[gene_id] = gene_dict[study_id]
            except KeyError:
                pass
        if not sub_dict_orth:
            if verbose:
                print("\t{0} and {1} don't share any ortholgue".format(
                    study_id, model_id))
            return
    else:
        ValueError("Must give one dict of orthogroups or orthologue")

    reader = libsbml.SBMLReader()
    document_to_compare = reader.readSBML(sbml_template)
    for i in range(document_to_compare.getNumErrors()):
        print(document_to_compare.getError(i).getMessage())
    model_to_compare = document_to_compare.getModel()
    listOfReactions_with_genes = [
        rxn for rxn in model_to_compare.getListOfReactions()
        if sp.parseNotes(rxn).get("GENE_ASSOCIATION", [None])[0]
    ]
    if verbose:
        print("\tSbml of {0} contains {1}/{2} reactions with genes assocation".
              format(model_id, len(listOfReactions_with_genes),
                     len(model_to_compare.getListOfReactions())))
    dict_rxn_ga = {}
    for rxn in listOfReactions_with_genes:
        ga = sp.parseNotes(rxn)['GENE_ASSOCIATION'][0]
        ga_for_gbr = re.sub(r" or ", "|", ga)
        ga_for_gbr = re.sub(r" and ", "&", ga_for_gbr)
        ga_for_gbr = re.sub(r"\s", "", ga_for_gbr)
        if re.findall("\||&", ga_for_gbr):
            to_compare_ga_subsets = list(gbr.compile_input(ga_for_gbr))
        else:
            ga_for_gbr = re.sub(r"\(|\)", "", ga_for_gbr)
            to_compare_ga_subsets = [[ga_for_gbr]]

        study_ga_subsets = []
        """
        to_compare_ga_subsets = [('a','c','d'),('c',)]
        sub_dict_orth = {'a':['a_a'],'c':['c_c'], 'd':['d_d']}
        """
        for to_compare_subset in to_compare_ga_subsets:
            study_subset = set()
            for gene in to_compare_subset:
                if gene in list(sub_dict_orth.keys()):
                    study_subset.update(sub_dict_orth[gene])
                else:
                    study_subset = set()
                    break
            if study_subset:
                """
                if verbose:
                    print("\t\t{0} == {1}".format(tuple(to_compare_subset), tuple(study_subset)))
                """
                study_ga_subsets.append(study_subset)
        if study_ga_subsets:
            study_ga = " or ".join([
                "(" + " and ".join(subset) + ")" for subset in study_ga_subsets
            ])
            if verbose:
                print("\t\tAdding %s" % rxn.id)
                print("\t\tGENE_ASSOCIATION: %s" % (study_ga))
            dict_rxn_ga[rxn.id] = study_ga
    if not dict_rxn_ga:
        if verbose:
            print(
                "\tNo reaction added from {0} to {1} because of missing orthologues"
                .format(model_id, study_id))
        return
    rxn_id_to_remove = set([
        rxn.id for rxn in model_to_compare.getListOfReactions()
    ]).difference(list(dict_rxn_ga.keys()))
    if verbose:
        print("\tRemoving %s unused reactions" % len(rxn_id_to_remove))
    [model_to_compare.removeReaction(rxn_id) for rxn_id in rxn_id_to_remove]
    cpd_id_to_preserve = set()
    for rxn_id, study_ga in list(dict_rxn_ga.items()):
        rxn = model_to_compare.getElementBySId(rxn_id)
        #update notes
        notes_in_dict = sp.parseNotes(rxn)
        notes_in_dict["GENE_ASSOCIATION"] = [study_ga]
        notes = "<body xmlns=\"http://www.w3.org/1999/xhtml\">"
        for k, v_list in list(notes_in_dict.items()):
            for v in v_list:
                notes += "<p>" + k + ": " + v + "</p>"
        notes += "</body>"
        rxn.setNotes(notes)
        cpd_in_rxn = set([p.getSpecies() for p in rxn.getListOfProducts()]).union(\
                         set([r.getSpecies() for r in rxn.getListOfReactants()]))
        cpd_id_to_preserve.update(cpd_in_rxn)
    all_species = [cpd.id for cpd in model_to_compare.getListOfSpecies()]
    [
        model_to_compare.removeSpecies(cpd_id) for cpd_id in all_species
        if cpd_id not in cpd_id_to_preserve
    ]
    new_id = os.path.basename(os.path.splitext(output)[0])
    model_to_compare.setId(new_id)
    libsbml.writeSBMLToFile(document_to_compare, output)
Example #2
0
def compare_multiple_sbml(sbml_path, output_folder):
    """
    Compare 1-n sbml, create two output files reactions.tsv and metabolites.tsv
    with the reactions/metabolites in each sbml

    Parameters
    ----------
    sbml_path: str
        path to a folder containing sbmls or multiple sbml paths separated by a ','
    output_folder: str
        path to the output folder
    """
    if not os.path.exists(output_folder):
        print("Creating %s" % output_folder)
        os.makedirs(output_folder)
    else:
        print(
            "%s already exist, old comparison output folders will be overwritten"
            % output_folder)

    if os.path.isdir(sbml_path):
        if not os.path.exists(sbml_path):
            raise FileNotFoundError(
                "No SBML directory (--sbml/sbml_path) accessible at " +
                sbml_path)
        all_files = [
            os.path.join(sbml_path, f) for f in next(os.walk(sbml_path))[2]
        ]
    else:
        all_files = sbml_path.split(",")
        for sbml_file in all_files:
            if not os.path.exists(sbml_file):
                raise FileNotFoundError(
                    "No SBML file (--sbml/sbml_path) accessible at " +
                    sbml_file)

    species_columns = [
        os.path.splitext(os.path.basename(all_file))[0]
        for all_file in sorted(all_files)
    ]
    gene_columns = [
        os.path.splitext(os.path.basename(all_file))[0] +
        '_genes_assoc (sep=;)' for all_file in sorted(all_files)
    ]
    all_reactions = {}
    all_compounds = []
    reactions = {}
    compounds = {}
    for sbml_file in all_files:
        sbml_1 = read_sbml_model(sbml_file)
        reactions[sbml_file] = sbml_1.reactions
        for rxn in sbml_1.reactions:
            if rxn.id not in all_reactions:
                all_reactions[rxn.id] = rxn
        compounds[sbml_file] = [
            metabolite.id for metabolite in sbml_1.metabolites
        ]
        all_compounds.extend(
            [metabolite.id for metabolite in sbml_1.metabolites])

    all_compounds = set(all_compounds)

    reaction_file = output_folder + '/reactions.tsv'
    reaction_file_rows = []

    for reaction_id in all_reactions:
        reaction_presents = []
        reaction_genes = []
        row = [reaction_id]
        for sbml_file in sorted(all_files):
            if reaction_id in [rxn.id for rxn in reactions[sbml_file]]:
                reaction_presents.append(1)
            else:
                reaction_presents.append(0)
            if reaction_id in reactions[sbml_file]:
                species_reaction = reactions[sbml_file].get_by_id(reaction_id)
                if 'GENE_ASSOCIATION' in species_reaction.notes:
                    ga_for_gbr = species_reaction.notes['GENE_ASSOCIATION']
                    ga_for_gbr = re.sub(r" or ", "|", ga_for_gbr)
                    ga_for_gbr = re.sub(r" and ", "&", ga_for_gbr)
                    ga_for_gbr = re.sub(r"\s", "", ga_for_gbr)
                    if re.findall("\||&", ga_for_gbr):
                        to_compare_ga_subsets = list(compile_input(ga_for_gbr))
                        genes = []
                        for to_compare_subset in to_compare_ga_subsets:
                            for gene in to_compare_subset:
                                genes.append(gene)
                    else:
                        genes = [ga_for_gbr.replace('(', '').replace(')', '')]
                    reaction_genes.append(';'.join(genes))
                else:
                    reaction_genes.append('')
            else:
                reaction_genes.append('')

        row = row + reaction_presents + reaction_genes
        row.append(all_reactions[reaction_id].reaction)

        reaction_file_rows.append(row)

    with open(reaction_file, 'w') as output_reaction:
        csvwriter = csv.writer(output_reaction, delimiter='\t')
        csvwriter.writerow(
            ['reaction', *species_columns, *gene_columns, '_formula'])
        csvwriter.writerows(reaction_file_rows)

    compounds_file = output_folder + '/metabolites.tsv'
    compounds_rows = []

    for compound_id in all_compounds:
        row = [compound_id]
        for sbml_file in sorted(all_files):
            if compound_id in compounds[sbml_file]:
                row.append(1)
            else:
                row.append(0)
        compounds_rows.append(row)

    with open(compounds_file, 'w') as output_compound:
        csvwriter = csv.writer(output_compound, delimiter='\t')
        csvwriter.writerow(['metabolite', *sorted(all_files)])
        csvwriter.writerows(compounds_rows)
Example #3
0
def padmet_to_sbml(padmet, output, model_id = None, obj_fct = None, sbml_lvl = 3, mnx_chem_prop = None, mnx_chem_xref = None, verbose = False):
    """
    Convert padmet file to sbml file.
    Specificity: 
    - ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id

    Parameters
    ----------
    padmet: str or padmet.classes.PadmetSpec/PadmetRef
        the pathname to the padmet file to convert, or PadmetSpec/PadmetRef object
    output: str
        the pathname to the sbml file to create
    model_id: str or None
        model id to set in sbml file
    obj_fct: str
        the identifier of the objection function, the reaction to test in FBA
    sbml_lvl: int
        the sbml level
    sbml_version: int
        the sbml version
    verbose: bool
        print informations
    """
    global all_ga
    if isinstance(padmet, str):
        padmet = PadmetSpec(padmet)

    if not model_id:
        model_id = os.path.splitext(os.path.basename(output))[0]
    if sbml_lvl:
        sbml_lvl = int(sbml_lvl)
    else:
        sbml_lvl = 3

    #dir_path_gbr = os.path.dirname(os.path.realpath(__file__))+"/grammar-boolean-rapsody.py"
    all_ga = []
    #create an empty sbml model
    with_mnx = False
    if mnx_chem_prop and mnx_chem_xref:
        with_mnx = True
        dict_mnx_chem_xref = parse_mnx_chem_xref(mnx_chem_xref)
        dict_mnx_chem_prop = parse_mnx_chem_prop(mnx_chem_prop)
    if sbml_lvl == 2:
        sbmlns = libsbml.SBMLNamespaces(2,1)
        document = libsbml.SBMLDocument(sbmlns)
        model = document.createModel()
        association = None
        # Create a unit definition
        mmol_per_gDW_per_hr = model.createUnitDefinition()
        check(mmol_per_gDW_per_hr, 'create unit definition')
        check(mmol_per_gDW_per_hr.setId('mmol_per_gDW_per_hr'), 'set unit definition id')
        
        unit = mmol_per_gDW_per_hr.createUnit()
        check(unit, 'create mole unit')
        check(unit.setKind(libsbml.UNIT_KIND_MOLE), 'set unit kind')
        check(unit.setScale(-3), 'set unit scale')
        check(unit.setMultiplier(1), 'set unit multiplier')
        check(unit.setOffset(0), 'set unit offset')
        
        unit = mmol_per_gDW_per_hr.createUnit()
        check(unit, 'create gram unit')
        check(unit.setKind(libsbml.UNIT_KIND_GRAM), 'set unit kind')
        check(unit.setExponent(-1), 'set unit exponent')
        check(unit.setMultiplier(1), 'set unit multiplier')
        check(unit.setOffset(0), 'set unit offset')
        
        unit = mmol_per_gDW_per_hr.createUnit()
        check(unit, 'create second unit')
        check(unit.setKind(libsbml.UNIT_KIND_SECOND), 'set unit kind')
        check(unit.setExponent(-1), 'set unit exponent')
        check(unit.setMultiplier(0.00027777), 'set unit multiplier')
        check(unit.setOffset(0), 'set unit offset')

    elif sbml_lvl == 3:
        sbmlns = libsbml.SBMLNamespaces(3,1,"fbc",1)
        document = libsbml.SBMLDocument(sbmlns)
        document.setPackageRequired("fbc", False)
        model = document.createModel()
        mplugin = model.getPlugin("fbc")
        association = ['<annotation>', 
        '<listOfGeneAssociations xmlns="http://www.sbml.org/sbml/level3/version1/fbc/version1">']
        check(model,                              'create model')
        check(model.setTimeUnits("second"),       'set model-wide time units')
        check(model.setExtentUnits("mole"),       'set model units of extent')
        check(model.setSubstanceUnits('mole'),    'set model substance units')
    if not model_id: model_id = os.path.splitext(os.path.basename(output))[0]
    model.setId(model_id)
    math_ast = libsbml.parseL3Formula('FLUX_VALUE')
    check(math_ast, 'create AST for rate expression')

    #generator of tuple: (x,y) x=species id,y=value of compart, if not defined=""
    species = [(rlt.id_out, rlt.misc.get("COMPARTMENT",[None])[0]) for rlt in padmet.getAllRelation() 
    if rlt.type in ["consumes","produces"]]
    if verbose: print("%s species" %len(species))
    #compart_dict: k = id_encoded, v = original id
    compart_dict = {}    
    #species_dict: k = species_id_encoded, v = dict: k' = {species_id, compart, name}, v' = value or None 
    species_dict = {}
    for species_id, compart in species:
        #encode id for sbml
        species_id_encoded = sp.convert_to_coded_id(species_id, "M", compart)
        #encode compart id for sbml
        #try to get the common_name, if non value return None
        name = padmet.dicOfNode[species_id].misc.get("COMMON-NAME",[species_id])[0]
        #update dicts
        species_dict[species_id_encoded] = {"species_id":species_id, "compart":compart, "name":name}
        
    for species_id_encoded, s_dict in species_dict.items():
        compart = s_dict["compart"]
        name = s_dict["name"]
        original_id = s_dict["species_id"]
        s = model.createSpecies()
        check(s, 'create species')
        check(s.setId(species_id_encoded), 'set species id %s' %species_id_encoded)
        check(s.setMetaId(species_id_encoded), 'set species meta id %s' %species_id_encoded)
        check(s.setBoundaryCondition(False), 'set boundaryCondition to False')
        check(s.setHasOnlySubstanceUnits(False), 'set setHasOnlySubstanceUnits to False')
        check(s.setConstant(False), 'set setConstant to False')
        check(s.setInitialAmount(0.0), 'set initAmount')
        #check(s.setMetaId(metaId), 'set species MetaId %s' %metaId)
        if name is not None:
            check(s.setName(name), 'set species Name %s' %name)
        else:
            check(s.setName(name), 'set species Name %s' %species_id)
        if compart is not None:
            compart_encoded = sp.convert_to_coded_id(compart)
            compart_dict[compart_encoded] = compart
            check(s.setCompartment(compart_encoded), 'set species compartment %s' %compart_encoded)
            if compart == BOUNDARY_ID:
                check(s.setBoundaryCondition(True), 'set boundaryCondition to True')
        if with_mnx:
            try:
                mnx_id = dict_mnx_chem_xref[original_id]
                species_prop = dict(dict_mnx_chem_prop[mnx_id])
            except (IndexError, KeyError) as e:
                #print(species_id)
                species_prop = None
            if species_prop:
                [species_prop.pop(k) for k,v in list(species_prop.items()) if (not v or v == "NA")]
                try:
                    charge = int(species_prop["charge"])
                except (ValueError, KeyError) as e:
                    charge = 0
                formula = species_prop.get("formula","")
                if re.findall("\(|\)|\.",formula): formula = None
                inchi = species_prop.get("inchi", None)
                if sbml_lvl == 3:
                    splugin = s.getPlugin("fbc")
                    check(splugin.setCharge(charge), 'set charge')
                    if formula:
                        check(splugin.setChemicalFormula(formula), 'set Formula')
                    if inchi:
                        annot_xml = create_annotation(inchi, species_id_encoded)
                        check(s.setAnnotation(annot_xml), 'set Annotations')
                    for prop, prop_v in list(species_prop.items()):
                        if prop in ["charge", "formula", "source", "description","inchi"] or prop_v in ["NA",""]:
                            species_prop.pop(prop)
                notes = create_note(species_prop)
                check(s.setNotes(notes), 'set Notes')

    for k, v in compart_dict.items():
        compart = model.createCompartment()
        check(compart,'create compartment')
        check(compart.setId(k),'set compartment id %s' %k)
        check(compart.setSize(1),'set size for compartment id %s' %k)
        check(compart.setConstant(True),'set constant for compartment id %s' %k)

        if v == "c":
            check(compart.setName("cytosol"),'set compartment name cytosol')
        elif v == "e":
            check(compart.setName("extracellular"),'set compartment name extracellular')
        elif v == "p":
            check(compart.setName("periplasm"),'set compartment name periplasm')
        elif v != k:
            check(compart.setName(v),'set compartment id %s' %v)

    if obj_fct is not None:
        obj_fct_encoded = sp.convert_to_coded_id(obj_fct)
        if verbose: print("the objectif reaction is: %s" %(obj_fct_encoded))
    reactions = [node for node in padmet.dicOfNode.values() if node.type == "reaction"]
    nb_reactions = str(len(reactions))    
    # Create reactions
    if verbose: print("%s reactions" %nb_reactions)
    for rNode in reactions:
        rId = rNode.id
        rId_encoded = sp.convert_to_coded_id(rId,"R")
        rName = rNode.misc.get("COMMON-NAME",[rId])[0]
        reaction = model.createReaction()
        check(reaction, 'create reaction')
        check(reaction.setId(rId_encoded), 'set reaction id %s' %rId_encoded)
        if rName is not None:
            check(reaction.setName(rName), 'set reaction name %s' %rName)
        check(reaction.setFast(False), 'set fast')

        #generator of tuple (reactant_id,stoichiometry,compart)
        consumed = ((rlt.id_out, rlt.misc["STOICHIOMETRY"][0], rlt.misc.get("COMPARTMENT",[None])[0]) 
        for rlt in padmet.dicOfRelationIn.get(rId, None) if rlt.type == "consumes")
        #generator of tuple (product_id,stoichiometry,compart)        
        produced = ((rlt.id_out, rlt.misc["STOICHIOMETRY"][0], rlt.misc.get("COMPARTMENT",[None])[0]) 
        for rlt in padmet.dicOfRelationIn.get(rId, None) if rlt.type == "produces")
        #set reversibility
        direction = rNode.misc["DIRECTION"][0]
        if direction == "LEFT-TO-RIGHT":
            reversible = False
        else:
            reversible = True
        check(reaction.setReversible(reversible), 'set reaction reversibility flag %s' %reversible)
        if sbml_lvl == 3:
            bound= mplugin.createFluxBound()
            bound.setReaction(rId_encoded)
            bound.setOperation("lessEqual")
            bound.setValue(def_max_upper_bound)

            bound= mplugin.createFluxBound()
            bound.setReaction(rId_encoded)
            bound.setOperation("greaterEqual")
            if reversible:
                bound.setValue(def_max_lower_bound)
            else:
                bound.setValue(0)
            if rId == obj_fct:
                 objective = mplugin.createObjective()
                 objective.setId("obj1")
                 objective.setType("maximize")
                 mplugin.setActiveObjectiveId("obj1")
                 fluxObjective = objective.createFluxObjective()
                 fluxObjective.setReaction(rId_encoded)
                 fluxObjective.setCoefficient(1)
        elif sbml_lvl == 2:
            kinetic_law = reaction.createKineticLaw()
            check(kinetic_law, 'create kinetic law')
            check(kinetic_law.setMath(math_ast), 'set math on kinetic law')
            #add parameter flux_value
            flux_value_k = kinetic_law.createParameter()
            check(flux_value_k, 'create parameter flux_value_k')
            check(flux_value_k.setId('FLUX_VALUE'), 'set parameter flux_value_k id')
            check(flux_value_k.setValue(0), 'set parameter flux_value_k value')
            check(flux_value_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter flux_value_k units')
            #add parameter upper/lower_bound, lower value depend on reversibility
            upper_bound_k = kinetic_law.createParameter()
            check(upper_bound_k, 'create parameter upper_bound_k')
            check(upper_bound_k.setId('UPPER_BOUND'), 'set parameter upper_bound_k')
            check(upper_bound_k.setValue(def_max_upper_bound),'set parameter upper_bounp_k value')
            check(upper_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter uppper_bound_k units')
    
            if reversible:
                lower_bound_k = kinetic_law.createParameter()
                check(lower_bound_k, 'create parameter lower_bound_k')
                check(lower_bound_k.setId('LOWER_BOUND'), 'set parameter lower_bound_k id')
                check(lower_bound_k.setValue(def_max_lower_bound), 'set parameter lower_bound_k value')
                check(lower_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter lower_bound_k units')
            else:
                lower_bound_k = kinetic_law.createParameter()
                check(lower_bound_k, 'create parameter lower_bound_k')
                check(lower_bound_k.setId('LOWER_BOUND'), 'set parameter lower_bound_k id')
                check(lower_bound_k.setValue(0), 'set parameter lower_bound_k value')
                check(lower_bound_k.setUnits('mmol_per_gDW_per_hr'), 'set parameter lower_bound_k units')
            #objective_coeeficient
            if rId == obj_fct:
                obj_fct_k = kinetic_law.createParameter()
                check(obj_fct_k, 'create parameter obj_fct_k')
                check(obj_fct_k.setId('OBJECTIVE_COEFFICIENT'), 'set parameter obj_fct_k id')
                check(obj_fct_k.setValue(1), 'set parameter obj_fct_k value')
            else:
                obj_fct_k = kinetic_law.createParameter()
                check(obj_fct_k, 'create parameter obj_fct_k')
                check(obj_fct_k.setId('OBJECTIVE_COEFFICIENT'), 'set parameter obj_fct_k id')
                check(obj_fct_k.setValue(0), 'set parameter obj_fct_k value')

        for cId, stoich, compart in consumed:
            cId_encoded = sp.convert_to_coded_id(cId,"M",compart)
            try:
                stoich = float(stoich)
            #for case stoich = n
            except ValueError:
                stoich = float(1)
            species_ref = reaction.createReactant()
            check(species_ref, 'create reactant')
            check(species_ref.setSpecies(cId_encoded), 'assign reactant species %s' %cId_encoded)
            check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich)
            check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich)
            if sbml_lvl == 3: check(species_ref.setConstant(False), 'set constant %s' %False)

        for pId, stoich, compart in produced:
            pId_encoded = sp.convert_to_coded_id(pId,"M",compart)
            try:
                stoich = float(stoich)
            except ValueError:
                stoich = float(1)
            species_ref = reaction.createProduct()
            check(species_ref, 'create product')
            check(species_ref.setSpecies(pId_encoded), 'assign product species %s' %pId_encoded)
            check(species_ref.setStoichiometry(stoich), 'set stoichiometry %s' %stoich)
            if sbml_lvl == 3: check(species_ref.setConstant(False), 'set constant %s' %False)

        linked_genes = set([rlt.id_out for rlt in padmet.dicOfRelationIn.get(rId, [])
        if rlt.type == "is_linked_to"])
        all_suppData = [padmet.dicOfNode[rlt.id_out] for rlt in padmet.dicOfRelationIn[rId] if rlt.type == "has_suppData"]
        #if rxn has suppdata, check in each suppData, if GENE_ASSOCIATION in misc
        #if run gbr.py to convert the gene assoc to a list of tuple representing the assoc
        #ex: #orignia_la: (a or b) and c => #ga_subsets: [(a,b),(c)]
        #add each ga in ga_subsets in all_ga_subsets
        #for each ga in all_ga_subsets: if len == 1: if the only ga len == 1: just add gene, else create OR structure
        #elif len > 1: create AND structure, then for each GA if len GA == 1: just add gene, else create OR structure
        #if no suppdata, if linked_genes: if len linked_genes == 1: just add gene, else create OR structure
        all_ga_subsets = list()
        if all_suppData:
            for suppData in all_suppData:
                try:
                    original_ga = suppData.misc["GENE_ASSOCIATION"][0]
                    ga_for_gbr = re.sub(r" or " , "|", original_ga)
                    ga_for_gbr = re.sub(r" and " , "&", ga_for_gbr)
                    ga_for_gbr = re.sub(r"\s" , "", ga_for_gbr)
                    #ga_for_gbr = "\"" + ga_for_gbr + "\""
                    if re.findall("\||\&",ga_for_gbr) and len(re.findall("\||\&",ga_for_gbr)) < 100:
                        ga_subsets = []
                        [ga_subsets.append(set(i)) for i in compile_input(ga_for_gbr)]
                        for ga in ga_subsets:
                            if ga not in all_ga_subsets:
                                all_ga_subsets.append(ga)
                except KeyError:
                    pass
        if all_ga_subsets:
            for gene_id in linked_genes:
                if not any([gene_id in ga for ga in all_ga_subsets]):
                    all_ga_subsets.append([gene_id])
        else:
            for gene_id in linked_genes:
                all_ga_subsets.append([gene_id])

        if association:
            if all_ga_subsets:
                add_ga(rId_encoded, all_ga_subsets)
            elif linked_genes:
                add_ga(rId_encoded, all_ga_subsets)
        #set notes
        notes_dict = {}
        if linked_genes:
            notes_dict["GENE_ASSOCIATION"] = " or ".join(["("+" and ".join([i for i in g])+")" for g in all_ga_subsets])

        try:
            categories = set([padmet.dicOfNode[rlt.id_out].misc["CATEGORY"][0] for rlt in padmet.dicOfRelationIn.get(rId,[]) if rlt.type == "has_reconstructionData"])
        except KeyError:
            categories = None
        if categories:
            notes_dict["CATEGORIES"] = " and ".join(categories)
        pathways = set([rlt.id_out for rlt in padmet.dicOfRelationIn.get(rId, [])
        if rlt.type == "is_in_pathway"])
        if len(pathways) != 0:
            notes_dict["SUBSYSTEM"] = " , ".join(pathways)
        if list(notes_dict.keys()):
            notes = create_note(notes_dict)            
            check(reaction.setNotes(notes), 'set notes %s' %notes)



    if all_ga:
        for ga in all_ga:
            association.extend(ga)
        association.extend(['</listOfGeneAssociations>', '</annotation>'])
        association = " ".join(association)
        model.setAnnotation(association)
    if verbose: print("Done, creating sbml file: %s" %output)
    libsbml.writeSBMLToFile(document, output)
Example #4
0
def test_gbr():
    expected = [('a', 'b', 'd'), ('a', 'b', 'e'), ('a', 'c', 'd'), ('a', 'c', 'e')]
    gbr_results = [elements for elements in gbr.compile_input('a&(b|c)&(d|e)')]

    assert gbr_results == expected