Example #1
0
def mask_file(infile, outfile,type,case):
    fin = open(infile,'r')
    seq_dic = fasta.fasta_dic(fin)
    index = seq_dic.keys()
    for id in index:
        seq =seq_dic[id]
        if type == "longAT":
            ret_seq = mask_longAT(seq,case)
        if type == "dust":
            ret_seq = mask_dust(seq, case)
        seq_dic[id] = ret_seq
    fout = open(outfile,'w')
    fasta.generate_fasta(seq_dic, fout)
Example #2
0
def Generate_Group(fastafile, outfile): #fastafile: ptr or filename
    import string,sys,os
    try:
        fastafile +'a'
        fastafile = open(fastafile,'r')
    except TypeError:
        pass
        
    import fasta
    id_seq_dic = fasta.fasta_dic(fastafile)
    ids = id_seq_dic.keys()
    for id in ids:
        outfile.write('[%s]\n' % (id))
        outfile.write(id+'\n')
        outfile.write('%s %s %s %s %s\n' %('+','1','1','1','1'))
    outfile.close()
if len(sys.argv[:]) != 6:
    print "USAGE: python Pick70_genome_itself.py inputfasta genomefasta groupfile(fout) version(BLAST/BLAT/GFCLIENT) masklower(yes, no)"
else:
    print "Identifying the input sequences' genomic target ... "

    input = sys.argv[1]
    genome = sys.argv[2]
    fout =open( sys.argv[3],'w')
    VERSION = string.upper(sys.argv[4])
    os.environ['genome'] = genome
    os.environ['input'] = input
    gfdir =""
        
    #convert to upper case if no masking user lower case
    finput = open(input,'r')
    seq_dic= fasta.fasta_dic(finput)
    index= seq_dic.keys()
    if sys.argv[5] !="yes":
        for id in index:
            seq_dic[id] = string.upper(seq_dic[id])
    tempinput = '~ftempinput'
    ftempinput =open(tempinput,'w')
    for id in index:
        ftempinput.write('>'+id+'\n')
        ftempinput.write(seq_dic[id]+'\n')
    ftempinput.close()

    #blast version
    if (VERSION =="BLAST"):
        #generate the blast db if the db files do not exist
        if (os.access(genome+".nsq", os.R_OK)) and (os.access(genome+".nin", os.R_OK)) and (os.access(genome+".nhr", os.R_OK)):
    #    OLIGOLEN = 70
    os.environ["inputf"] = sys.argv[1]
    os.environ["genomef"] = sys.argv[2]
    genomefile = sys.argv[2]
    out = open(sys.argv[3], "w")
    os.environ["STRAND"] = sys.argv[4]

    # test if the program can procede
    if os.access(genomefile + ".nhr", os.R_OK):
        if not os.access(genomefile + ".nhr", os.W_OK):
            print "The same program is running.  the current program can not procede"
            sys.exit()

    # redo the sw_score ,TM part
    inputf = open(sys.argv[1], "r")
    input_dic = fasta.fasta_dic(inputf)

    sw_dic = {}
    gc_dic = {}
    repeat_dic = {}

    for key in input_dic.keys():
        seq = input_dic[key]

        os.environ["len"] = str(len(seq))
        fout = open("~temp", "w")
        fout.write(seq)
        fout.close()
        os.environ["file"] = "~temp"

        f = os.popen("./code/SW $file 0 $len")
Example #5
0
elif __name__ == "__main__":
    print "OLIGO program in progress ... "

    OLIGOLEN = int(sys.argv[6])
    DB = sys.argv[3]

    TRACEFLAG = int(sys.argv[4])  # print to stdio
    STRAND = sys.argv[5]

    inputfile_name = sys.argv[1]
    infile = open(inputfile_name, "r")

    id_seq_dic = {}
    # process input seqfasta file { id:seq,.......}
    id_seq_dic = fasta.fasta_dic(infile)
    ids = id_seq_dic.keys()  # only input ids
    # does this need to mask lower case in the furthur blast
    if sys.argv[7] != "yes":
        for id in ids:
            id_seq_dic[id] = string.upper(id_seq_dic[id])

    # clean up redundant sequence in input file
    seq_id_dic = all_id_seq_dic(id_seq_dic)
    input_dup_list = []
    print "the following sequences are duplicated in the input file:",
    # error log
    ferror = open("error.log", "a")
    for values in seq_id_dic.values():
        if len(values) > 1:
            for id in values[1:]: