def __init__(self, ref_fasta_file, allc_file=None, output_file=None): """ initilize class function """ self.fasta = genome.GenomeClass(ref_fasta_file) if allc_file is not None: self.load_allc_file(allc_file) if output_file is not None: self.write_h5_file(output_file)
def __init__(self, file_paths, file_ids=None, genome="at_tair10", load_all=True): self.genome = g.GenomeClass(genome) self.num_lines = len(file_paths) self._file_path = file_paths if load_all: self.meths_list = self.load_meths_files(file_paths) if file_ids is not None: self.file_ids = file_ids else: self.file_ids = np.array([op.basename(ef) for ef in file_paths])
# Main module for analysing 1010 genomes data # Summary statistics import logging import h5py as h5 import numpy as np import pandas as pd import os.path import glob import sys from . import run_bedtools import itertools import pybedtools as pybed from pygenome import genome as g genome = g.GenomeClass("at_tair10") log = logging.getLogger(__name__) def die(msg): sys.stderr.write('Error: ' + msg + '\n') sys.exit(1) class WriteHDF51001Table(object): def __init__(self, input_file, output_file, chunk_size=1000): self.chunk_size = chunk_size self.input_file = input_file self.output_file = output_file # self.genome = g.ArabidopsisGenome(ref_genome)
def __init__(self, hdf5_file, ref_fasta="at_tair10", bin_bed=None): self.h5file = h5.File(hdf5_file, 'r') self.filter_pos_ix = self.get_filter_inds(bin_bed) self.chrpositions = np.array(self.h5file['chrpositions']) self.genome = genome.GenomeClass(ref_fasta)
""" Pakaged functions to get the methyaltion levels using bed tools """ import os.path import numpy as np from subprocess import Popen, PIPE import logging import pandas as pd import pybedtools as pybed log = logging.getLogger(__name__) from pygenome import genome tair10 = genome.GenomeClass("at_tair10") def sort_bed_df(bed_df): new_bed_df = bed_df.copy() new_bed_df = new_bed_df.rename(columns={0: "chr", 1: "start", 2: "end"}) new_bed_df['strand'] = "+" new_bed_df.loc[new_bed_df['start'] > new_bed_df['end'], 'strand'] = '-' new_bed_df.loc[new_bed_df['strand'] == '-', ['start', 'end']] = new_bed_df.loc[ new_bed_df['strand'] == '-', ['end', 'start']].values new_bed_df = new_bed_df.sort_values(['chr', 'start']) new_bed_df.iloc[:, 1] = new_bed_df.iloc[:, 1].astype(int) new_bed_df.iloc[:, 2] = new_bed_df.iloc[:, 2].astype(int) return (new_bed_df) def identify_positions_given_names(in_file, araport11_file): if araport11_file is None: