#!/usr/bin/env python2.7
"""
Example:
	$ python hifive_processing.py alignments.raw name
"""

import hifive
import sys

rawAlign, name = sys.argv[1], sys.argv[
    2]  # Name will be the prefix of output files

## Load in the restriction enzyme digested fend coordinates
fend = hifive.Fend('%s_fend.hdf5' % (name), mode='w')
fend.load_fends('../ce10nm2.bed',
                genome_name='ce10',
                re_name='DpnII',
                format='bed')
fend.save()

## Load in the read data
data = hifive.HiCData('%s_data.hdf5' % (name), mode='w')
data.load_data_from_bam('%s_fend.hdf5' % (name), rawAlign, maxinsert=500)
data.save()

## Create a HiC object
hic = hifive.HiC('%s_hic.hdf5' % (name), 'w')
hic.load_data('%s_data.hdf5' % (name))
hic.save()
bam1 = sys.argv[1]
bam2 = sys.argv[2]
RE_bed = sys.argv[3]
outdir = sys.argv[4]
#RE_bed='/resources/HindIII_hg19_liftover.bed'

if not os.path.exists(outdir):
    os.mkdir(outdir)

# Creating a Fend object
fend = hifive.Fend(outdir + '/fend_object.hdf5', mode='w')
fend.load_fends(RE_bed, re_name='RE', format='bed')
fend.save()

# Creating a HiCData object
data = hifive.HiCData(outdir + '/HiC_data_object.hdf5', mode='w')
data.load_data_from_bam(outdir + '/fend_object.hdf5', [bam1, bam2],
                        maxinsert=500)
data.save()

# Creating a HiC Project object
hic = hifive.HiC(outdir + '/HiC_project_object.hdf5', 'w')
hic.load_data(outdir + '/HiC_data_object.hdf5')
hic.save()

# Filtering HiC fends

hic = hifive.HiC(outdir + '/HiC_project_object.hdf5')
hic.filter_fends(mininteractions=1, mindistance=500000, maxdistance=0)
hic.save()
Esempio n. 3
0
    def run_hifive(self, parameters):

        fend_file = parameters['fend_file']
        bam_file_1 = parameters['bam_file_1']
        bam_file_2 = parameters['bam_file_2']
        model = parameters['model']

        restriction_enzymes = map(
            str, parameters['restriction_enzyme'].strip('[]').split(','))
        if len(restriction_enzymes) == 1:
            restriction_enzyme = restriction_enzymes[0]
        else:
            restriction_enzyme = ','.join(restriction_enzymes)

        if model == 'Yaffe-Tanay':
            # Creating a Fend object
            fend = hifive.Fend('fend_object.hdf5', mode='w')
            fend.load_fends(fend_file,
                            re_name=restriction_enzyme,
                            format='bed')
            fend.save()

            # Creating a HiCData object
            data = hifive.HiCData('HiC_data_object.hdf5', mode='w')
            data.load_data_from_bam('fend_object.hdf5',
                                    [bam_file_1, bam_file_2],
                                    maxinsert=500,
                                    skip_duplicate_filtering=False)
            data.save()

            # Creating a HiC Project object
            hic = hifive.HiC('HiC_project_object.hdf5', 'w')
            hic.load_data('HiC_data_object.hdf5')
            hic.save()

            # Filtering HiC fends
            hic = hifive.HiC('HiC_project_object.hdf5')
            hic.filter_fends(mininteractions=1, mindistance=0, maxdistance=0)
            hic.save()

            # Finding HiC distance function
            hic = hifive.HiC('HiC_project_object.hdf5')
            hic.find_distance_parameters(numbins=90, minsize=200, maxsize=0)
            hic.save()

            # Learning correction parameters using the binning algorithm
            hic = hifive.HiC('HiC_project_object.hdf5')
            hic.find_binning_fend_corrections(
                max_iterations=1000,
                mindistance=500000,
                maxdistance=0,
                num_bins=[20, 20, 20, 20],
                model=['len', 'distance', 'gc', 'mappability'],
                parameters=['even', 'even', 'even', 'even'],
                usereads='cis',
                learning_threshold=1.0)
            hic.save('HiC_norm_binning.hdf5')

        elif model == 'Hi-Corrector':
            # Creating a Fend object
            fend = hifive.Fend('fend_object.hdf5', mode='w')
            fend.load_fends(fend_file,
                            re_name=restriction_enzyme,
                            format='bed')
            fend.save()

            # Creating a HiCData object
            data = hifive.HiCData('HiC_data_object.hdf5', mode='w')
            data.load_data_from_bam('fend_object.hdf5',
                                    [bam_file_1, bam_file_2],
                                    maxinsert=500,
                                    skip_duplicate_filtering=False)
            data.save()

            # Creating a HiC Project object
            hic = hifive.HiC('HiC_project_object.hdf5', 'w')
            hic.load_data('HiC_data_object.hdf5')
            hic.save()
Esempio n. 4
0
    def run_hifive(self, parameters):

        fend_file = parameters['fend_file']
        bam_file_1 = parameters['bam_file_1']
        bam_file_2 = parameters['bam_file_2']
        model = parameters['model']
        add_gc = bool(parameters['add_gc'])
        add_mappability = bool(parameters['add_mappability'])

        restriction_enzymes = map(
            str, parameters['restriction_enzyme'].strip('[]').split(','))
        if len(restriction_enzymes) == 1:
            restriction_enzyme = restriction_enzymes[0]
        else:
            restriction_enzyme = ','.join(restriction_enzymes)

        # Run for both models
        if not os.path.isfile('HiC_project_object.hdf5'):
            fend = hifive.Fend('fend_object.hdf5', mode='w')
            fend.load_fends(fend_file,
                            re_name=restriction_enzyme,
                            format='bed')
            fend.save()

            # Creating a HiCData object
            data = hifive.HiCData('HiC_data_object.hdf5', mode='w')
            data.load_data_from_bam('fend_object.hdf5',
                                    [bam_file_1, bam_file_2],
                                    maxinsert=500,
                                    skip_duplicate_filtering=False)
            data.save()

            # Creating a HiC Project object
            hic = hifive.HiC('HiC_project_object.hdf5', 'w')
            hic.load_data('HiC_data_object.hdf5')
            hic.save()

        if model == 'Yaffe-Tanay':
            if not os.path.isfile('HiC_norm_binning.hdf5'):
                # Filtering HiC fends
                hic = hifive.HiC('HiC_project_object.hdf5')
                hic.filter_fends(mininteractions=1,
                                 mindistance=0,
                                 maxdistance=0)

                # Finding HiC distance function
                hic.find_distance_parameters(numbins=90,
                                             minsize=200,
                                             maxsize=0)
                hic.save('HiC_project_object_with_distance_parameters.hdf5')

                # Learning correction parameters using the binning algorithm
                my_model = ['len', 'distance']
                if add_gc == True:
                    my_model.append('gc')
                if add_mappability == True:
                    my_model.append('mappability')
                my_num_bins = [20] * len(my_model)
                my_parameters = ['even'] * len(my_model)
                hic.find_binning_fend_corrections(max_iterations=1000,
                                                  mindistance=500000,
                                                  maxdistance=0,
                                                  num_bins=my_num_bins,
                                                  model=my_model,
                                                  parameters=my_parameters,
                                                  usereads='cis',
                                                  learning_threshold=1.0)
                hic.save('HiC_norm_binning.hdf5')