Esempio n. 1
0
def get_gc(some_gbk):
    """
    Get GC content for contigs in genbank file, output file for circs line plot

    """    
    with open (some_gbk, 'r') as file_handle:
        gc_points = []
        sp_name = None
        for record in SeqIO.parse(file_handle, 'gb'):
            for b in range(len(record))[500::1000]:
                gc_cont = SeqUtils.GC(record.seq[b-500:b+499])
                gc_points.append((record.name, b-500, b+499, gc_cont))
            
            sp_name = kv.parse_genbank_name(some_gbk)
        sp_strain = sp_name[2]
        with open('circos/GC/gc_{}.txt'.format(os.path.basename(some_gbk)[:-13]), 'w+') as out_handle:
            values = [x[3] for x in gc_points]
            stats = (min(values), max(values), np.average(values), np.std(values))
            out_handle.write("# Min: {}\n# Max: {}\n# Avg, Std: {}, {}\n".format(
                stats[0], stats[1], stats[2], stats[3]
                )
            )
            for point in gc_points:
                out_handle.write('{0}{1} {2} {3} {4}\n'.format(
                    sp_strain,
                    point[0],
                    point[1],
                    point[2],
                    point[3]
                    )
                )
            return stats
Esempio n. 2
0
def get_karyotype(some_gbk):
    """
    Convert Genbank file into Karyotype file for Circos
    - Each contig is a "chromosome"
    - format: 'chr - ID LABEL START END COLOR'
    """
    with open (some_gbk, 'r') as file_handle:
        contigs = []
        sp_name = None
        for record in SeqIO.parse(file_handle, 'gb'):
            sp_name = kv.parse_genbank_name(some_gbk)
            contigs.append((record.name, len(record)))
        sp_strain = sp_name[2]
        if not os.path.isdir('circos/karyotypes/'):
            os.makedirs('circos/karyotypes/')
        with open('circos/karyotypes/karyotype_{}.txt'.format(os.path.basename(some_gbk)[:-13]), 'w+') as karyotype:
            color = [np.random.randint(0,255), np.random.randint(0,255), np.random.randint(0,255)]
            for contig in contigs:
                if contig[1] > 1000:
                    karyotype.write('chr - {0}{1} {2} {3} {4} {5},{6},{7}\n'.format(
                        sp_strain,
                        contig[0],
                        contig[0],
                        '1',
                        contig[1],
                        *color
                        )
                    )
                else:
                    break