コード例 #1
0
 def __init__(self, ref_fasta_file, allc_file=None, output_file=None):
     """
     initilize class function
     """
     self.fasta = genome.GenomeClass(ref_fasta_file)
     if allc_file is not None:
         self.load_allc_file(allc_file)
     if output_file is not None:
         self.write_h5_file(output_file)
コード例 #2
0
 def __init__(self,
              file_paths,
              file_ids=None,
              genome="at_tair10",
              load_all=True):
     self.genome = g.GenomeClass(genome)
     self.num_lines = len(file_paths)
     self._file_path = file_paths
     if load_all:
         self.meths_list = self.load_meths_files(file_paths)
     if file_ids is not None:
         self.file_ids = file_ids
     else:
         self.file_ids = np.array([op.basename(ef) for ef in file_paths])
コード例 #3
0
# Main module for analysing 1010 genomes data
# Summary statistics
import logging
import h5py as h5
import numpy as np
import pandas as pd
import os.path
import glob
import sys
from . import run_bedtools
import itertools
import pybedtools as pybed

from pygenome import genome as g

genome = g.GenomeClass("at_tair10")

log = logging.getLogger(__name__)


def die(msg):
    sys.stderr.write('Error: ' + msg + '\n')
    sys.exit(1)


class WriteHDF51001Table(object):
    def __init__(self, input_file, output_file, chunk_size=1000):
        self.chunk_size = chunk_size
        self.input_file = input_file
        self.output_file = output_file
        # self.genome = g.ArabidopsisGenome(ref_genome)
コード例 #4
0
 def __init__(self, hdf5_file, ref_fasta="at_tair10", bin_bed=None):
     self.h5file = h5.File(hdf5_file, 'r')
     self.filter_pos_ix = self.get_filter_inds(bin_bed)
     self.chrpositions = np.array(self.h5file['chrpositions'])
     self.genome = genome.GenomeClass(ref_fasta)
コード例 #5
0
"""
Pakaged functions to get the methyaltion levels using bed tools
"""
import os.path
import numpy as np
from subprocess import Popen, PIPE
import logging
import pandas as pd
import pybedtools as pybed

log = logging.getLogger(__name__)
from pygenome import genome
tair10 = genome.GenomeClass("at_tair10")


def sort_bed_df(bed_df):
    new_bed_df = bed_df.copy()
    new_bed_df = new_bed_df.rename(columns={0: "chr", 1: "start", 2: "end"})
    new_bed_df['strand'] = "+"
    new_bed_df.loc[new_bed_df['start'] > new_bed_df['end'], 'strand'] = '-'
    new_bed_df.loc[new_bed_df['strand'] == '-',
                   ['start', 'end']] = new_bed_df.loc[
                       new_bed_df['strand'] == '-', ['end', 'start']].values
    new_bed_df = new_bed_df.sort_values(['chr', 'start'])
    new_bed_df.iloc[:, 1] = new_bed_df.iloc[:, 1].astype(int)
    new_bed_df.iloc[:, 2] = new_bed_df.iloc[:, 2].astype(int)
    return (new_bed_df)


def identify_positions_given_names(in_file, araport11_file):
    if araport11_file is None: