Ejemplo n.º 1
0
def RefFa(file_in):
    dict_fa = {}
    for item in FastaReader(file_in):
        list_lb = re.split('_', item.name)
        lb = '_'.join(list_lb[:2])
        dict_fa[lb] = str(item.seq)
    return dict_fa
Ejemplo n.º 2
0
def RefFa(file_in):
    dict_fa = {}
    list_all = []
    for item in FastaReader(file_in):
        list_lb = re.split('__', item.name)
        lb = list_lb[0]
        list_all.append(list_lb[1])
        dict_fa[lb] = str(item.seq)
    return sorted(set(list_all)), dict_fa
def get_variants(chr, pos, vartype, fasta_chr_file):
    variant_list = []
    #fasta_chr_file="/NGENEBIO/workflow-dependencies/seqseek/homo_sapiens_GRCh37/chr%s.fa"%(chr)
    ref_seq = FastaReader(fasta_chr_file).__iter__().next()
    ref = ref_seq[pos - 1:pos + 3].seq.upper()

    if vartype == 'SNV':
        variant_list.extend(get_snps(chr, pos, ref))
    if vartype == 'DEL':
        variant_list.extend(get_insertions(chr, pos, ref))
    if vartype == 'INS':
        variant_list.extend(get_deletions(chr, pos, ref))
    return variant_list
Ejemplo n.º 4
0
def HandleData(file_in, list_all, outfile):
    dict_fa = {}
    Cluster = os.path.basename(file_in).strip('.fa')
    for item in FastaReader(file_in):
        seq = str(item.seq)
        if seq not in dict_fa:
            dict_fa[seq] = [item.name,]
        else:
            dict_fa[seq] += [item.name,]
    n = 1
    for seq in dict_fa:
        pd_out = MakeAllSample(dict_fa[seq], list_all, Cluster)
        for index in pd_out.index:
            outfile.write(index+'\t'+'\t'.join([str(i) for i in pd_out.loc[index,:]])+'\n')
Ejemplo n.º 5
0
def HandleData(file_in, pd_in):
    list_all = [i for i in pd_in.columns]
    dict_fa = {}
    Cluster = os.path.basename(file_in).strip('.fa')
    for item in FastaReader(file_in):
        seq = str(item.seq)
        if seq not in dict_fa:
            dict_fa[seq] = [
                item.name,
            ]
        else:
            dict_fa[seq] += [
                item.name,
            ]
    n = 1
    for seq in dict_fa:
        pd_out = MakeAllSample(dict_fa[seq], list_all, Cluster)
        pd_in = pd_in.append(pd_out)
    return pd_in
Ejemplo n.º 6
0
def HandleData(dir_in):
    Input = os.path.join(dir_in, 'input.fa')
    Output = os.path.join(dir_in, 'output.fa')
    with open(Input, 'r') as f:
        for line in f:
            Consus = line.strip().lstrip('>')
            break
    dict_fa = {}
    for item in FastaReader(Output):
        dict_fa[item.name] = str(item.seq)
    total = len(dict_fa[Consus])
    dict_burdon = {}
    for key in dict_fa:
        m = 0
        for i in range(total):
            if dict_fa[key][i] != dict_fa[Consus][i]:
                m += 1
        lb = re.findall('(\w+_\w+)GM', key)[0]
        dict_burdon[lb] = str(round(float(m*1000/total), 4))
    return dict_burdon, Consus
Ejemplo n.º 7
0
def ReadFasta(file_in):
    dict_out = {}
    for s in FastaReader(file_in):
        dict_out[s.name] = str(s.seq)
    return dict_out