def load_data(data, scores, orthmap="", fc=2, mfc=1): if type(data) is list: paths = data else: paths = [os.path.join(data, fn) for fn in next(os.walk(data))[2]] elutionDatas = [] elutionProts = set([]) for elutionFile in paths: if elutionFile.rsplit(os.sep, 1)[-1].startswith("."): continue elutionFile = elutionFile.rstrip() elutionData = CS.ElutionData(elutionFile, frac_count=fc, max_frac_count=mfc) if orthmap != "": if orthmap != False: mapper = GS.Inparanoid("", inparanoid_cutoff=1) mapper.readTable(orthmap, direction=0) elutionData.orthmap(mapper) elutionDatas.append(elutionData) elutionProts = elutionProts | set(elutionData.prot2Index.keys()) for score in scores: score.init(elutionData) return elutionProts, elutionDatas
def create_goldstandard(clusters, target_taxid, valprots): if target_taxid != "9606" and target_taxid != "": orthmap = GS.Inparanoid(taxid=target_taxid) else: orthmap = "" gs = GS.Goldstandard_from_Complexes("Goldstandard") gs.make_reference_data(clusters, orthmap, found_prots=valprots) return gs
def orth_map(args): clusterF, taxid, outF = args clust = GS.Clusters(False) clust.read_file(clusterF) orthmap = GS.Inparanoid(taxid=taxid) orthmap.mapComplexes(clust) clust.merge_complexes() clust.filter_complexes() outFH = open(outF, "w") outFH.write(clust.to_string()) outFH.close()