#!/usr/bin/env python2.7 """ Example: $ python hifive_processing.py alignments.raw name """ import hifive import sys rawAlign, name = sys.argv[1], sys.argv[ 2] # Name will be the prefix of output files ## Load in the restriction enzyme digested fend coordinates fend = hifive.Fend('%s_fend.hdf5' % (name), mode='w') fend.load_fends('../ce10nm2.bed', genome_name='ce10', re_name='DpnII', format='bed') fend.save() ## Load in the read data data = hifive.HiCData('%s_data.hdf5' % (name), mode='w') data.load_data_from_bam('%s_fend.hdf5' % (name), rawAlign, maxinsert=500) data.save() ## Create a HiC object hic = hifive.HiC('%s_hic.hdf5' % (name), 'w') hic.load_data('%s_data.hdf5' % (name)) hic.save()
bam1 = sys.argv[1] bam2 = sys.argv[2] RE_bed = sys.argv[3] outdir = sys.argv[4] #RE_bed='/resources/HindIII_hg19_liftover.bed' if not os.path.exists(outdir): os.mkdir(outdir) # Creating a Fend object fend = hifive.Fend(outdir + '/fend_object.hdf5', mode='w') fend.load_fends(RE_bed, re_name='RE', format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData(outdir + '/HiC_data_object.hdf5', mode='w') data.load_data_from_bam(outdir + '/fend_object.hdf5', [bam1, bam2], maxinsert=500) data.save() # Creating a HiC Project object hic = hifive.HiC(outdir + '/HiC_project_object.hdf5', 'w') hic.load_data(outdir + '/HiC_data_object.hdf5') hic.save() # Filtering HiC fends hic = hifive.HiC(outdir + '/HiC_project_object.hdf5') hic.filter_fends(mininteractions=1, mindistance=500000, maxdistance=0) hic.save()
def run_hifive(self, parameters): fend_file = parameters['fend_file'] bam_file_1 = parameters['bam_file_1'] bam_file_2 = parameters['bam_file_2'] model = parameters['model'] restriction_enzymes = map( str, parameters['restriction_enzyme'].strip('[]').split(',')) if len(restriction_enzymes) == 1: restriction_enzyme = restriction_enzymes[0] else: restriction_enzyme = ','.join(restriction_enzymes) if model == 'Yaffe-Tanay': # Creating a Fend object fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save() # Filtering HiC fends hic = hifive.HiC('HiC_project_object.hdf5') hic.filter_fends(mininteractions=1, mindistance=0, maxdistance=0) hic.save() # Finding HiC distance function hic = hifive.HiC('HiC_project_object.hdf5') hic.find_distance_parameters(numbins=90, minsize=200, maxsize=0) hic.save() # Learning correction parameters using the binning algorithm hic = hifive.HiC('HiC_project_object.hdf5') hic.find_binning_fend_corrections( max_iterations=1000, mindistance=500000, maxdistance=0, num_bins=[20, 20, 20, 20], model=['len', 'distance', 'gc', 'mappability'], parameters=['even', 'even', 'even', 'even'], usereads='cis', learning_threshold=1.0) hic.save('HiC_norm_binning.hdf5') elif model == 'Hi-Corrector': # Creating a Fend object fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save()
def run_hifive(self, parameters): fend_file = parameters['fend_file'] bam_file_1 = parameters['bam_file_1'] bam_file_2 = parameters['bam_file_2'] model = parameters['model'] add_gc = bool(parameters['add_gc']) add_mappability = bool(parameters['add_mappability']) restriction_enzymes = map( str, parameters['restriction_enzyme'].strip('[]').split(',')) if len(restriction_enzymes) == 1: restriction_enzyme = restriction_enzymes[0] else: restriction_enzyme = ','.join(restriction_enzymes) # Run for both models if not os.path.isfile('HiC_project_object.hdf5'): fend = hifive.Fend('fend_object.hdf5', mode='w') fend.load_fends(fend_file, re_name=restriction_enzyme, format='bed') fend.save() # Creating a HiCData object data = hifive.HiCData('HiC_data_object.hdf5', mode='w') data.load_data_from_bam('fend_object.hdf5', [bam_file_1, bam_file_2], maxinsert=500, skip_duplicate_filtering=False) data.save() # Creating a HiC Project object hic = hifive.HiC('HiC_project_object.hdf5', 'w') hic.load_data('HiC_data_object.hdf5') hic.save() if model == 'Yaffe-Tanay': if not os.path.isfile('HiC_norm_binning.hdf5'): # Filtering HiC fends hic = hifive.HiC('HiC_project_object.hdf5') hic.filter_fends(mininteractions=1, mindistance=0, maxdistance=0) # Finding HiC distance function hic.find_distance_parameters(numbins=90, minsize=200, maxsize=0) hic.save('HiC_project_object_with_distance_parameters.hdf5') # Learning correction parameters using the binning algorithm my_model = ['len', 'distance'] if add_gc == True: my_model.append('gc') if add_mappability == True: my_model.append('mappability') my_num_bins = [20] * len(my_model) my_parameters = ['even'] * len(my_model) hic.find_binning_fend_corrections(max_iterations=1000, mindistance=500000, maxdistance=0, num_bins=my_num_bins, model=my_model, parameters=my_parameters, usereads='cis', learning_threshold=1.0) hic.save('HiC_norm_binning.hdf5')