Exemple #1
0
def main():
    readslist = []
    readslist2 = []
    input = "/Users/Xin/Desktop/IC_project/output/rpoB_output/ICW_rpoB.fasta"
    output0 = "read_counts.csv"
    output = open("ICW_rpoB_nopaired.fasta", 'w')
    reads_tag ='_2:N:0:5'
    try:
        f0 = open(input, 'r')
        print("Open success!")
    except IOError:
        print ("no such file!")
    for line in f0:
        tmp0 = re.search("^\>(M00704:49:000000000-AFW6D[\d\:]+)",line)
        if tmp0:
            read = tmp0.group(1)
            readslist.append(read)
    print("Number of the reads in the list is ", len(readslist))
    f0.close()
    df_reads = pd.DataFrame(readslist, columns = ['read'])
    read_counts = df_reads['read'].value_counts()
    read_counts.to_csv(output0)
    f= open(input, 'r') 
        
    with open("/Users/Xin/Desktop/IC_project/output/rpoB_output/read_counts.csv", 'r') as csvfile:
        f1 = csv.DictReader(csvfile, delimiter = ",", fieldnames = ['read','count'])
        for row in f1:
            if int(row['count']) == 2:
                readslist2.append(row['read']+reads_tag)  
        subset = Subsampling()  
        subset.exclude(f, readslist2, output)
def main():
    input1dir = input('Reads collections: ')
    input2dir = input('Reads filter: ')
    outputdir = input('Output file: ')
    output = open(outputdir, 'w')
    try:
        all_reads = open(input1dir, 'r')
    except IOError:
        print ("no such file!") 
    readslist = []
    with open(input2dir, 'r') as inputfile:
        f = csv.DictReader(inputfile, delimiter =",", fieldnames=['reads','genus'])
        for row in f:
            readslist.append(row['reads']) # all the reads
        print("total reads in filter list:", len(readslist)) 
        subset = Subsampling()  
        subset.exclude(all_reads, readslist, output)
def main():
    genuslist = []
    readlist = []
    inputfiledir1 = "/Users/Xin/Desktop/IC_project/output/Jan222016/resource_tables/ICC_DS2_2_unmapped_genus_count.csv"
    inputfiledir2 = "/Users/Xin/Desktop/IC_project/output/Jan222016/resource_tables/ICC_DS2_2_unmapped_taxa.csv"
    inputfiledir3 = "/Users/Xin/Desktop/IC_project/output/ICC_DS2_CLC_mapping_output/ICC_DS2_2_CLC_unmapped.fa"
    outputfiledir = "/Users/Xin/Desktop/IC_project/output/Feb032016/ICC_DS2_2_unmapped_genus.fa"
    outputfile = open(outputfiledir, 'w')
    seq = open(inputfiledir3, 'r')
    genus = pd.read_csv(inputfiledir1, names=["taxa", "count"], header=None)
    read_taxa = pd.read_csv(inputfiledir2, names=["num", "read","id","taxa"], header=None)
    for row in genus["taxa"]:
        genuslist.append(row)
    for i in range(0,len(read_taxa["taxa"])):
        tmp = read_taxa["taxa"][i]
        if tmp in genuslist:
              readlist.append(read_taxa["read"][i])
    subset = Subsampling()  
    subset.include(seq, readlist, outputfile)          
def main(argv):
    if len(argv[1:]) == 3:
        input1dir = argv[1]
        input2dir = argv[2]
        outputdir = argv[3]
    else:
        print("Three arguements are needed!!")
    output = open(outputdir, 'w')
    try:
        all_reads = open(input1dir, 'r')
    except IOError:
        print ("no such file!") 
    readslist = []
    with open(input2dir, 'r') as inputfile:
        f = csv.DictReader(inputfile, delimiter =",", fieldnames=['reads','genus'])
        for row in f:
            readslist.append(row['reads']) # all the reads
        print("total reads in filter list:", len(readslist)) 
        subset = Subsampling()  
        subset.exclude(all_reads, readslist, output)