#!python import sys import os import os.path from Hotpep.hotpep_data import hotpep_data_path protein_collection = hotpep_data_path("fungus_fungus/Chaetomium_thermophilum") if len(sys.argv) > 1: protein_collection = sys.argv[1].replace("?", " ") cazyme_array = ["AAexp"] if len(sys.argv) > 2: cazyme_array = sys.argv[2].split("_") assension_family_hash = {} for cazy_class in cazyme_array: if os.path.exists(protein_collection + "/" + cazy_class): for filename in os.listdir(protein_collection + "/" + cazy_class): if "summary" not in filename and filename.endswith(".txt"): fam = filename[:-4] array = open( protein_collection + "/" + cazy_class + "/" + filename, 'r').readlines() array = array[1:] for hit in array: arr = hit.split('/t') if len(arr) > 1: if arr[1] not in assension_family_hash: assension_family_hash[arr[1]] = [] assension_family_hash[arr[1]].append(fam + "_" +
#Start Add by Le Huang 12/24/2018 if os.path.exists(organism_array[0] + '/Results/output.txt'): call(['rm', organism_array[0] + '/Results/output.txt']) #End Add by Le Huang 12/24/2018 ##Start Delete by Le Huang #try: # call(['rm', organism_array[0]+'/Results/output.txt']) # except: # pass ## End Delete by Le Huang for protein_dir_name in organism_array: print("Screening " + protein_dir_name + " for") for cazy_class in cazyme_array: print(cazy_class) peptide_dir_name = hotpep_data_path("CAZY_PPR_patterns", cazy_class) variables = [ threads, protein_dir_name, peptide_dir_name, peptide_length, hit_cut_off, freq_cut_off ] call("parallel_group_many_proteins_many_patterns_noDNA.py " + " ".join(str(x) for x in variables), shell=True) #call(["add_functions_orf.py", protein_dir_name, peptide_dir_name]) var1 = 1 while var1 <= threads: try: os.remove(protein_dir_name + "/thread" + str(var1) + ".txt") except: pass var1 += 1
#!/usr/bin/env python3 import sys import os import os.path from subprocess import call import natsort from Hotpep.hotpep_data import hotpep_data_path protein_dir_name = hotpep_data_path("fungus_fungus") if len(sys.argv) > 1: protein_dir_name = sys.argv[1].replace("?", " ") function_significance_limit = 2 #Normally 2 for new patterns (no proteins with this function in group) and 10 for old patterns (sum freq conserved peptides for function in group ) score_file = "_group_ec" peptide_dir_name = hotpep_data_path("CAZY_PPR_patterns/GH") if len(sys.argv) > 2: peptide_dir_name = sys.argv[2].replace("?", " ") fam_group_score_hash = {} with open(peptide_dir_name + "/fam_list.txt", 'r') as f: #### for line in f: fam = line.rstrip() try: with open( peptide_dir_name + "/" + fam + "/" + fam + score_file + ".txt", 'r') as f2: group_func_hash = {} for row in f2: row = row.rstrip() arr = row.split('\t')