def main(readlib, cmapfile, matesfile, out_dir):
    LOGFH = open(os.path.join(out_dir, 'parse_mates.LOG'), 'a')
    LOGFH.write('Start the program to parse [%s] fragments for [%s] ...  %s\n' % (readlib, matesfile, str(datetime.datetime.now())))
    LOGFH.flush()
    # Initiate the FragmentParser
    matesparser = FragmentParser(CHRMAP=cmapfile)
    # Parse and write to the output files organized by chromosome
    chrs = matesparser.chr_mapping.values()
    chrs_fh = generate_fhs(chrs, readlib, out_dir)
    for chr, fragment in matesparser.parse_mates(matesfile, readlib):
        fout = chrs_fh[chr]
        record = fragment.get_bedrecord(chr)
        fout.write(record + '\n')
    close_fhs(chrs_fh)
    LOGFH.write('Finish the program ...  %s\n\n' % str(datetime.datetime.now()))
    LOGFH.close()
Example #2
0
def main(readlib, cmapfile, matesfile, out_dir):
    LOGFH = open(os.path.join(out_dir, 'parse_mates.LOG'), 'a')
    LOGFH.write(
        'Start the program to parse [%s] fragments for [%s] ...  %s\n' %
        (readlib, matesfile, str(datetime.datetime.now())))
    LOGFH.flush()
    # Initiate the FragmentParser
    matesparser = FragmentParser(CHRMAP=cmapfile)
    # Parse and write to the output files organized by chromosome
    chrs = matesparser.chr_mapping.values()
    chrs_fh = generate_fhs(chrs, readlib, out_dir)
    for chr, fragment in matesparser.parse_mates(matesfile, readlib):
        fout = chrs_fh[chr]
        record = fragment.get_bedrecord(chr)
        fout.write(record + '\n')
    close_fhs(chrs_fh)
    LOGFH.write('Finish the program ...  %s\n\n' %
                str(datetime.datetime.now()))
    LOGFH.close()
Example #3
0
 LOGFH.flush()
 paras = parse_paras(parafile)
 # 2. Build CpGs
 LOGFH.write('... Build CpG sites ...  %s\n' % str(datetime.datetime.now()))
 siteparser = SiteParser(chr)
 siteparser.parse_sites(cpg_file, 'CpGFull')
 cpg_sites = siteparser.get_sites()
 # 3. Build RE sites first
 LOGFH.write('... Build RE sites ...  %s\n' % str(datetime.datetime.now()))
 siteparser = SiteParser(chr)
 siteparser.parse_sites(re_sitefile, 'RE')
 re_sites = siteparser.get_sites()
 del siteparser
 # 4. Build RE fragments
 LOGFH.write('... Build RE fragments ...  %s\n' % str(datetime.datetime.now()))
 fragparser = FragmentParser()
 # Split fragfile into subsets to avoid memory overflow
 re_linenum = count_file_lines(re_fragfile)
 refh = open(re_fragfile)
 re_fraglines = []
 pre_count = 0
 for count, line in enumerate(refh):
     re_fraglines.append(line)        
     if (count % 10000 == 0 and count != 0) or count + 1 == re_linenum:
         re_frags = [fragment for fragment in fragparser.parse_bedfrags(re_fraglines, 'RE')]
         # 5. Filter RE fragments, cpg_sites are updated
         LOGFH.write('...... Filter RE fragments from lines [%d, %d] ......\n' % (pre_count, count))
         refilter = REFilter(re_frags, cpg_sites, re_sites, paras['outlen'], paras['inlen'])
         refilter.scan()
         # Save information for the passed and failed RE fragments
         write_logfiles(out_dir, re_frags, paras, 're')