Beispiel #1
0
def load(filepath, db):
    filename = os.path.basename(filepath)
    base, ext = os.path.splitext(filepath)
    if not re.match('^\.(sam|bam)', ext):
        raise UnsupportedFileError('ERROR: Not supported file format')

    sam_data = Sam(db)
    chr_data = Chromosome(db)

    if sam_data.get_by_filename(filename) is not None:
        raise AlreadyLoadedError('WARNING: Already loaded "%s"' % filename)

    logging.info("Begin to load '%s'" % filename)

    # Convert sam to bam
    if ext == '.sam':
        insam = pysam.Samfile(filepath, 'r')
        filepath = base + '.bam'
        outbam = pysam.Samfile(filepath, 'wb', template=insam)
        for s in insam:
            outbam.write(s)

    # Create index if not exist
    bai = filepath + '.bai'
    if not os.path.isfile(bai):
        logging.info("Create index '%s'" % os.path.basename(bai))
        pysam.index(filepath)

    # load sam
    samfile = pysam.Samfile(filepath)
    sam_data.append(filename,
                    samfile.header, samfile.lengths,
                    samfile.mapped,
                    samfile.nreferences, samfile.references)

    # load chromosome
    for ref in samfile.references:
        chr_data.append(trim_chromosome_name(ref))