def load(filepath, db, action=None): samfile = _load_sam(filepath, db) sam_data = Sam(db) chromosome_data = Chromosome(db) chromosomes = [] for ref in samfile.references: name = trim_chromosome_name(ref) c = chromosome_data.get_by_name(name) c['ref'] = ref chromosomes.append(c) filename = os.path.basename(filepath) sam = sam_data.get_by_filename(filename) if sam is None: logging.error('Error : please load "%s" first' % filename) samId = sam['id'] # cypileup cypileup.pileup(samfile, chromosomes, samId, db, action) samfile.close()
def load(filepath, db): filename = os.path.basename(filepath) base, ext = os.path.splitext(filepath) if not re.match('^\.(sam|bam)', ext): raise UnsupportedFileError('ERROR: Not supported file format') sam_data = Sam(db) chr_data = Chromosome(db) if sam_data.get_by_filename(filename) is not None: raise AlreadyLoadedError('WARNING: Already loaded "%s"' % filename) logging.info("Begin to load '%s'" % filename) # Convert sam to bam if ext == '.sam': insam = pysam.Samfile(filepath, 'r') filepath = base + '.bam' outbam = pysam.Samfile(filepath, 'wb', template=insam) for s in insam: outbam.write(s) # Create index if not exist bai = filepath + '.bai' if not os.path.isfile(bai): logging.info("Create index '%s'" % os.path.basename(bai)) pysam.index(filepath) # load sam samfile = pysam.Samfile(filepath) sam_data.append(filename, samfile.header, samfile.lengths, samfile.mapped, samfile.nreferences, samfile.references) # load chromosome for ref in samfile.references: chr_data.append(trim_chromosome_name(ref))
def load(filepath, db, action=None): filename = os.path.basename(filepath) file_ext = filename.split('.')[-1] if file_ext != 'bed': raise UnsupportedFileError('ERROR: Not supported file format') bed_data = Bed(db) chr_data = Chromosome(db) bed_fragment_data = BedFragment(db) if bed_data.get_by_filename(filename) is not None: raise AlreadyLoadedError('WARNING: Already loaded "%s"' % filename) logging.info("Begin to load '%s'" % filename) # load bed bedfile = pybed.BedReader(open(filepath, 'r')) bed_data.append(filename, "", "", 0, 0) bed = bed_data.get_by_filename(filename) # load bed fragments count = 0 for line in bedfile.yield_lines(): b = bedfile.get_line(line) c_name = b['chrom'] c_name = c_name.replace('Chr', '') c_name = c_name.replace('chr', '') c_name = c_name.replace('.', '') c = chr_data.get_by_name(c_name) if c is None: chr_data.append(c_name) c = chr_data.get_by_name(c_name) if bed['rgb'] == 0: rgb = b['itemRgb'].split(',') if len(rgb) == 3: [ir, ig, ib] = map(long, b['itemRgb'].split(',')) else: [ir, ig, ib] = [0, 0, 0] else: [ir, ig, ib] = [0, 0, 0] if b['strand'] == '+': strand = 0 elif b['strand'] == '-': strand = 1 else: strand = -1 bed_fragment_data.append(bed['id'], c['id'], long(b['chromStart']), long(b['chromEnd']), b['name'], long(b['score']), strand, long(b['thickStart']), long(b['thickEnd']), ir, ig, ib, long(b['blockCount']), b['blockSizes'], b['blockStarts']) count += 1 if bedfile.length >= 100 and count % (bedfile.length / 100) == 0: if action is not None: act = action() if isinstance(act, BedLoaderAction): progress = (count + 1) * 100 / bedfile.length act(progress) logging.debug('Loaded %d fragments' % count)