#!python
import sys
import os
import os.path

from Hotpep.hotpep_data import hotpep_data_path

protein_collection = hotpep_data_path("fungus_fungus/Chaetomium_thermophilum")
if len(sys.argv) > 1:
    protein_collection = sys.argv[1].replace("?", " ")
cazyme_array = ["AAexp"]
if len(sys.argv) > 2:
    cazyme_array = sys.argv[2].split("_")

assension_family_hash = {}

for cazy_class in cazyme_array:
    if os.path.exists(protein_collection + "/" + cazy_class):
        for filename in os.listdir(protein_collection + "/" + cazy_class):
            if "summary" not in filename and filename.endswith(".txt"):
                fam = filename[:-4]
                array = open(
                    protein_collection + "/" + cazy_class + "/" + filename,
                    'r').readlines()
                array = array[1:]
                for hit in array:
                    arr = hit.split('/t')
                    if len(arr) > 1:
                        if arr[1] not in assension_family_hash:
                            assension_family_hash[arr[1]] = []
                        assension_family_hash[arr[1]].append(fam + "_" +
#Start Add by Le Huang 12/24/2018
if os.path.exists(organism_array[0] + '/Results/output.txt'):
    call(['rm', organism_array[0] + '/Results/output.txt'])
#End Add by Le Huang 12/24/2018

##Start Delete by Le Huang
#try:
# 	call(['rm', organism_array[0]+'/Results/output.txt'])
# except:
# 	pass
## End Delete by Le Huang
for protein_dir_name in organism_array:
    print("Screening " + protein_dir_name + " for")
    for cazy_class in cazyme_array:
        print(cazy_class)
        peptide_dir_name = hotpep_data_path("CAZY_PPR_patterns", cazy_class)
        variables = [
            threads, protein_dir_name, peptide_dir_name, peptide_length,
            hit_cut_off, freq_cut_off
        ]
        call("parallel_group_many_proteins_many_patterns_noDNA.py " +
             " ".join(str(x) for x in variables),
             shell=True)
        #call(["add_functions_orf.py", protein_dir_name, peptide_dir_name])
        var1 = 1
        while var1 <= threads:
            try:
                os.remove(protein_dir_name + "/thread" + str(var1) + ".txt")
            except:
                pass
            var1 += 1
예제 #3
0
#!/usr/bin/env python3
import sys
import os
import os.path
from subprocess import call
import natsort

from Hotpep.hotpep_data import hotpep_data_path

protein_dir_name = hotpep_data_path("fungus_fungus")
if len(sys.argv) > 1:
    protein_dir_name = sys.argv[1].replace("?", " ")
function_significance_limit = 2  #Normally 2 for new patterns (no proteins with this function in group) and 10 for old patterns (sum freq conserved peptides for function in group )
score_file = "_group_ec"

peptide_dir_name = hotpep_data_path("CAZY_PPR_patterns/GH")
if len(sys.argv) > 2:
    peptide_dir_name = sys.argv[2].replace("?", " ")

fam_group_score_hash = {}
with open(peptide_dir_name + "/fam_list.txt", 'r') as f:  ####
    for line in f:
        fam = line.rstrip()
        try:
            with open(
                    peptide_dir_name + "/" + fam + "/" + fam + score_file +
                    ".txt", 'r') as f2:
                group_func_hash = {}
                for row in f2:
                    row = row.rstrip()
                    arr = row.split('\t')