コード例 #1
0
 def get_data(self):
     if (self.strip_enclosing_dictionary):
         X = self.X[list(self.X.keys())[0]] 
         Y = self.Y[list(self.Y.keys())[0]]
         return util.enum(X=X, Y=Y)
     else: 
         return util.enum(X=self.X, Y=self.Y)
コード例 #2
0
 def get_data_for_eval(self):
     if (self.wrap_in_keys is not None):
         return util.enum(
             X={self.wrap_in_keys[0]: np.array(self.to_load_for_eval_x)},
             Y={self.wrap_in_keys[1]: np.array(self.to_load_for_eval_y)})
     else:
         return util.enum(X=np.array(self.to_load_for_eval_x),
                          Y=np.array(self.to_load_for_eval_y))
コード例 #3
0
 def get_data(self):
     fasta_generator = self.get_generator(loop_infinitely=False)
     X = []
     Y = []
     for (x, y, coor) in fasta_generator:
         X.append(x)
         Y.append(y)
     if (self.wrap_in_keys is not None):
         return util.enum(X={self.wrap_in_keys[0]: np.array(X)},
                          Y={self.wrap_in_keys[1]: np.array(Y)})
     else:
         return util.enum(X=np.array(X), Y=np.array(Y))
コード例 #4
0
    def get_data_for_eval(self):
        X = {}
        Y = {}
        #take the items immediately preceding the current start_index
        eval_start_index_1 = max(self.start_index-self.num_to_load_for_eval,0)
        eval_end_index_1 = self.start_index
        #any leftover taken from the end (presumably last seen)
        eval_start_index_2 = self.num_items-\
                             max(self.num_to_load_for_eval-self.start_index,0)
        eval_end_index_2 = self.num_items
        for input_mode in self.X:
            #load the last self.num_to_load_for_eval
            arr1 = self.X[input_mode][eval_start_index_1:eval_end_index_1]
            arr2 = self.X[input_mode][eval_start_index_2:eval_end_index_2]
            the_arr = np.concatenate([arr1, arr2], axis=0)
            if (self.strip_enclosing_dictionary):
                X = the_arr
            else:
                X[input_mode] = the_arr

        for output_mode in self.Y:
            arr1 = self.Y[output_mode][eval_start_index_1:eval_end_index_1]
            arr2 = self.Y[output_mode][eval_start_index_2:eval_end_index_2]
            the_arr = np.concatenate([arr1, arr2], axis=0)
            if (self.strip_enclosing_dictionary):
                Y = the_arr 
            else:
                Y[output_mode] = the_arr
        return util.enum(X=X,Y=Y)
コード例 #5
0
 def get_data(self):
     fasta_batch_generator = get_fasta_batch_generator(
         batch_size=self.batch_size,
         ref_fasta=self.ref_fasta,
         bed_source=self.bed_source,
         rc_augment=self.rc_augment,
         loop_infinitely=False,
         read_in_order=self.read_in_order)
     X = []
     Y = []
     for (x_batch, y_batch, bed_entries) in fasta_batch_generator:
         X.extend(x_batch)
         Y.extend(y_batch)
     if (self.wrap_in_keys is not None):
         return util.enum(X={self.wrap_in_keys[0]: np.array(X)},
                          Y={self.wrap_in_keys[1]: np.array(Y)})
     else:
         return util.enum(X=np.array(X), Y=np.array(Y))
コード例 #6
0
    def get_data_for_eval(self):
        if (self.wrap_in_keys is not None):
            return util.enum(
                X={self.wrap_in_keys[0]: np.array(self.to_load_for_eval_x)},
                Y={self.wrap_in_keys[1]: np.array(self.to_load_for_eval_y)})
        else:
            #Studying weird side-effect...
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("Call np.array on y")
            #np.array(self.to_load_for_eval_y)
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("Call np.array on y")
            #np.array(self.to_load_for_eval_y)
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("Call np.array on x")
            #np.array(self.to_load_for_eval_x)
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("Call np.array on x")
            #np.array(self.to_load_for_eval_x)
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))
            #print("x",len(self.to_load_for_eval_x),
            #      "y",len(self.to_load_for_eval_y))

            assert len(np.array(self.to_load_for_eval_x)) == len(
                np.array(self.to_load_for_eval_y))
            return util.enum(X=np.array(self.to_load_for_eval_x),
                             Y=np.array(self.to_load_for_eval_y),
                             coors=self.to_load_for_eval_coors,
                             fastastr=self.to_load_for_eval_fastastrs)
コード例 #7
0
from __future__ import print_function
import sys
import os
from collections import namedtuple, OrderedDict, defaultdict
import itertools
import numpy as np
import avutils.file_processing as fp
import avutils.util as av_util
from avutils.dynamic_enum import Key, Keys
import h5py

ContentType = namedtuple('ContentType', ['name', 'casting_function'])
ContentTypes = av_util.enum(integer=ContentType("int", int),
                            floating=ContentType("float", float),
                            string=ContentType("str", str))

ContentTypesLookup = dict(
    (x.name, x.casting_function) for x in ContentTypes.vals)

#TODO: implement weights
RootKeys = Keys(Key("features"), Key("labels"), Key("splits"), Key("weights"))

###
#Features Keys
###
DefaultModeNames = av_util.enum(labels="default_output_mode_name",
                                features="default_input_mode_name")
FeaturesKeys = Keys(Key("features_format"), Key("opts"),
                    Key("input_mode_name", default=DefaultModeNames.features))
FeatureSetYamlKeys_Columns = Keys(
    Key("file_names"), Key("content_type", default=ContentTypes.floating.name),
コード例 #8
0
        np.transpose(arr, (0, 2, 1)) for arr in [predictions, true_y]
    ]
    #reshape
    predictions, true_y = [
        np.reshape(arr, (-1, 4)) for arr in [predictions, true_y]
    ]
    #clip
    predictions = np.clip(predictions, (10**-6), (1 - (10**-6)))
    #compute categ crossentropy
    return [-np.mean(np.sum(true_y * np.log(predictions), axis=-1))]


AccuracyStats = util.enum(auROC="auROC",
                          auPRC="auPRC",
                          balanced_accuracy="balanced_accuracy",
                          unbalanced_accuracy="unbalanced_accuracy",
                          spearman_corr="spearman_corr",
                          pearson_corr="pearson_corr",
                          mean_squared_error="mean_squared_error",
                          onehot_rows_crossent="onehot_rows_crossent")
compute_func_lookup = {
    AccuracyStats.auROC: auroc_func,
    AccuracyStats.auPRC: auprc_func,
    AccuracyStats.balanced_accuracy: balanced_accuracy,
    AccuracyStats.unbalanced_accuracy: unbalanced_accuracy,
    AccuracyStats.spearman_corr: spearman_corr,
    AccuracyStats.pearson_corr: pearson_corr,
    AccuracyStats.mean_squared_error: mean_squared_error,
    AccuracyStats.onehot_rows_crossent: onehot_rows_crossent_func
}
is_larger_better_lookup = {
    AccuracyStats.auROC: True,
コード例 #9
0
import sys
import os
from collections import namedtuple, OrderedDict, defaultdict
import itertools
import numpy as np
import avutils.file_processing as fp
import avutils.util as av_util
from avutils.dynamic_enum import Key, Keys
import h5py

ContentType = namedtuple('ContentType', ['name', 'casting_function'])
ContentTypes = av_util.enum(integer=ContentType("int", int),
                            floating=ContentType("float", float),
                            string=ContentType("str", str))

ContentTypesLookup = dict(
    (x.name, x.casting_function) for x in ContentTypes.vals)

#TODO: implement weights
RootKeys = Keys(Key("features"), Key("labels"), Key("splits"), Key("weights"))

###
#Features Keys
###
FeaturesFormat = av_util.enum(rows_and_columns='rows_and_columns',
                              fasta='fasta')
DefaultModeNames = av_util.enum(labels="default_output_mode_name",
                                features="default_input_mode_name")
FeaturesKeys = Keys(Key("features_format"), Key("opts"),
                    Key("input_mode_name", default=DefaultModeNames.features))
FeatureSetYamlKeys_RowsAndCols = Keys(