def get_data(self): if (self.strip_enclosing_dictionary): X = self.X[list(self.X.keys())[0]] Y = self.Y[list(self.Y.keys())[0]] return util.enum(X=X, Y=Y) else: return util.enum(X=self.X, Y=self.Y)
def get_data_for_eval(self): if (self.wrap_in_keys is not None): return util.enum( X={self.wrap_in_keys[0]: np.array(self.to_load_for_eval_x)}, Y={self.wrap_in_keys[1]: np.array(self.to_load_for_eval_y)}) else: return util.enum(X=np.array(self.to_load_for_eval_x), Y=np.array(self.to_load_for_eval_y))
def get_data(self): fasta_generator = self.get_generator(loop_infinitely=False) X = [] Y = [] for (x, y, coor) in fasta_generator: X.append(x) Y.append(y) if (self.wrap_in_keys is not None): return util.enum(X={self.wrap_in_keys[0]: np.array(X)}, Y={self.wrap_in_keys[1]: np.array(Y)}) else: return util.enum(X=np.array(X), Y=np.array(Y))
def get_data_for_eval(self): X = {} Y = {} #take the items immediately preceding the current start_index eval_start_index_1 = max(self.start_index-self.num_to_load_for_eval,0) eval_end_index_1 = self.start_index #any leftover taken from the end (presumably last seen) eval_start_index_2 = self.num_items-\ max(self.num_to_load_for_eval-self.start_index,0) eval_end_index_2 = self.num_items for input_mode in self.X: #load the last self.num_to_load_for_eval arr1 = self.X[input_mode][eval_start_index_1:eval_end_index_1] arr2 = self.X[input_mode][eval_start_index_2:eval_end_index_2] the_arr = np.concatenate([arr1, arr2], axis=0) if (self.strip_enclosing_dictionary): X = the_arr else: X[input_mode] = the_arr for output_mode in self.Y: arr1 = self.Y[output_mode][eval_start_index_1:eval_end_index_1] arr2 = self.Y[output_mode][eval_start_index_2:eval_end_index_2] the_arr = np.concatenate([arr1, arr2], axis=0) if (self.strip_enclosing_dictionary): Y = the_arr else: Y[output_mode] = the_arr return util.enum(X=X,Y=Y)
def get_data(self): fasta_batch_generator = get_fasta_batch_generator( batch_size=self.batch_size, ref_fasta=self.ref_fasta, bed_source=self.bed_source, rc_augment=self.rc_augment, loop_infinitely=False, read_in_order=self.read_in_order) X = [] Y = [] for (x_batch, y_batch, bed_entries) in fasta_batch_generator: X.extend(x_batch) Y.extend(y_batch) if (self.wrap_in_keys is not None): return util.enum(X={self.wrap_in_keys[0]: np.array(X)}, Y={self.wrap_in_keys[1]: np.array(Y)}) else: return util.enum(X=np.array(X), Y=np.array(Y))
def get_data_for_eval(self): if (self.wrap_in_keys is not None): return util.enum( X={self.wrap_in_keys[0]: np.array(self.to_load_for_eval_x)}, Y={self.wrap_in_keys[1]: np.array(self.to_load_for_eval_y)}) else: #Studying weird side-effect... #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("Call np.array on y") #np.array(self.to_load_for_eval_y) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("Call np.array on y") #np.array(self.to_load_for_eval_y) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("Call np.array on x") #np.array(self.to_load_for_eval_x) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("Call np.array on x") #np.array(self.to_load_for_eval_x) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) #print("x",len(self.to_load_for_eval_x), # "y",len(self.to_load_for_eval_y)) assert len(np.array(self.to_load_for_eval_x)) == len( np.array(self.to_load_for_eval_y)) return util.enum(X=np.array(self.to_load_for_eval_x), Y=np.array(self.to_load_for_eval_y), coors=self.to_load_for_eval_coors, fastastr=self.to_load_for_eval_fastastrs)
from __future__ import print_function import sys import os from collections import namedtuple, OrderedDict, defaultdict import itertools import numpy as np import avutils.file_processing as fp import avutils.util as av_util from avutils.dynamic_enum import Key, Keys import h5py ContentType = namedtuple('ContentType', ['name', 'casting_function']) ContentTypes = av_util.enum(integer=ContentType("int", int), floating=ContentType("float", float), string=ContentType("str", str)) ContentTypesLookup = dict( (x.name, x.casting_function) for x in ContentTypes.vals) #TODO: implement weights RootKeys = Keys(Key("features"), Key("labels"), Key("splits"), Key("weights")) ### #Features Keys ### DefaultModeNames = av_util.enum(labels="default_output_mode_name", features="default_input_mode_name") FeaturesKeys = Keys(Key("features_format"), Key("opts"), Key("input_mode_name", default=DefaultModeNames.features)) FeatureSetYamlKeys_Columns = Keys( Key("file_names"), Key("content_type", default=ContentTypes.floating.name),
np.transpose(arr, (0, 2, 1)) for arr in [predictions, true_y] ] #reshape predictions, true_y = [ np.reshape(arr, (-1, 4)) for arr in [predictions, true_y] ] #clip predictions = np.clip(predictions, (10**-6), (1 - (10**-6))) #compute categ crossentropy return [-np.mean(np.sum(true_y * np.log(predictions), axis=-1))] AccuracyStats = util.enum(auROC="auROC", auPRC="auPRC", balanced_accuracy="balanced_accuracy", unbalanced_accuracy="unbalanced_accuracy", spearman_corr="spearman_corr", pearson_corr="pearson_corr", mean_squared_error="mean_squared_error", onehot_rows_crossent="onehot_rows_crossent") compute_func_lookup = { AccuracyStats.auROC: auroc_func, AccuracyStats.auPRC: auprc_func, AccuracyStats.balanced_accuracy: balanced_accuracy, AccuracyStats.unbalanced_accuracy: unbalanced_accuracy, AccuracyStats.spearman_corr: spearman_corr, AccuracyStats.pearson_corr: pearson_corr, AccuracyStats.mean_squared_error: mean_squared_error, AccuracyStats.onehot_rows_crossent: onehot_rows_crossent_func } is_larger_better_lookup = { AccuracyStats.auROC: True,
import sys import os from collections import namedtuple, OrderedDict, defaultdict import itertools import numpy as np import avutils.file_processing as fp import avutils.util as av_util from avutils.dynamic_enum import Key, Keys import h5py ContentType = namedtuple('ContentType', ['name', 'casting_function']) ContentTypes = av_util.enum(integer=ContentType("int", int), floating=ContentType("float", float), string=ContentType("str", str)) ContentTypesLookup = dict( (x.name, x.casting_function) for x in ContentTypes.vals) #TODO: implement weights RootKeys = Keys(Key("features"), Key("labels"), Key("splits"), Key("weights")) ### #Features Keys ### FeaturesFormat = av_util.enum(rows_and_columns='rows_and_columns', fasta='fasta') DefaultModeNames = av_util.enum(labels="default_output_mode_name", features="default_input_mode_name") FeaturesKeys = Keys(Key("features_format"), Key("opts"), Key("input_mode_name", default=DefaultModeNames.features)) FeatureSetYamlKeys_RowsAndCols = Keys(