Exemple #1
0
    def get_batch_full(self, feat, files, augment=False):
        sound = [
            self.endpoint_detect(sig, rate, augment=augment)
            for sig, rate in feat
        ]
        rate = [_[1] for _ in feat]
        features = []
        len0 = []
        for s, r in zip(sound, rate):
            f = []
            mfcc_feat, l = self.feature_extract_mfcc(s, r)  # [T,H]
            len0.append(l)
            for item in mfcc_feat:
                f.append(item)
            if cfg.use_pitch:
                f.extend(self.feature_extract_pitch(s, r))
            if cfg.use_timefeat:
                f.extend(self.feature_extract_timespace(s, r))
            features.append(f)

        features = [_ for _ in zip(*features)]

        inp = [cvar(self.pad_batch(_)).transpose(0, 1) for _ in features]
        inp = torch.cat(inp, 2)
        return inp, np.array(len0)
 def __get_features_from_dict(self, label, f_dict, ref_dict):
     features = []
     for tfile, nfile, rfile, chromosome, pos in ref_dict.get(label):
         key = ';'.join([rfile, nfile, tfile, chromosome, pos])
         try:
             features.append(eval(f_dict[key][0]))
         except KeyError:
             logging.error('error: cannot find key "%s"\n' % str(key))
     return features
 def __get_features_from_feature_db(self, feature_dict, label):
     features = []
     for key, value in feature_dict.iteritems():
         key = key.strip().split(';')
         label_feature_dict = value[1]
         if not label == label_feature_dict:
             continue
         features.append(eval(value[0]))
     return features
Exemple #4
0
def load_features(feature_file):
    """ load features from txt file """
    features = list()
    for line in read_lines(feature_file):
        feature_dict = dict()
        for feature in line.strip().split():
            key, value = feature.split('=')[0], feature.split('=')[1]
            feature_dict[key] = value
        features.append(feature_dict)
    return features
Exemple #5
0
def naive_features(data):
    features = []
    fields = ['Tag%d' % i for i in range(1, 6)]
    for i in range(len(data)):
        tags = {}
        label = data['OpenStatus'][i]
        for f in fields:
            tag = data[f][i]
            if not pd.isnull(tag):
                tags[tag] = 1
        features.append((tags, label))
    return features
Exemple #6
0
def naive_features(data):
    features = []
    fields = ['Tag%d' % i for i in range(1,6)]
    for i in range(len(data)):
        tags = {}
        label = data['OpenStatus'][i]
        for f in fields:
            tag = data[f][i]
            if not pd.isnull(tag):
                tags[tag] = 1
        features.append((tags, label))
    return features
Exemple #7
0
    def plot_feature(self, feature_name, plot_all=False):
        n_classes = 10

        colors = ['dimgray', 'red', 'brown', 'darkgray', 'yellow', 'palegreen', 
            'seagreen', 'deepskyblue', 'navy', 'deeppink']
        genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 
            'metal', 'pop', 'reggae', 'rock']

        # get column vector of class assignments
        classes = self.get_feature('class')

        # for basic features w/string name use get_feature
        if isinstance(feature_name, str):
            # get a matrix of the required features
            feature_vector = self.get_feature(feature_name)
        else:
            # assume feature is a function on a single entry of glob.data
            # returning a 1x2 numpy arrray
            features = []
            for i in range(len(self.data)):
                features.append(feature_name(self.data[i]))

            feature_vector = np.vstack(features)

        if feature_vector.shape[1] != 2:
            raise

        # plot them by class
        plt.figure(1)
        plot_handles = []
        for i in range(1,n_classes + 1):
            # extract by class
            subset = feature_vector[classes[:,0]==i,:]
            if plot_all:
                # plot all in a given class in the same color
                plot_handles.append(
                    plt.scatter(subset[:,0], subset[:,1], c = colors[i-1]))
            else:
                # plot the mean of each class and use std for error bars
                plot_handles.append(
                    plt.errorbar(np.nanmean(subset[:,0]), np.nanmean(subset[:,1]),
                    xerr = np.nanstd(subset[:,0]), yerr = np.nanstd(subset[:,1]),
                    mfc = colors[i-1], mec = colors[i-1], ecolor = colors[i-1],
                    fmt = 'o'))

        plt.title('2D Feature space by genre')
        plt.xlabel('Parameter 1')
        plt.ylabel('Parameter 2')
        plt.legend(plot_handles, [genres[j] for j in range(n_classes)])
        plt.show()
def convert_data(data_name, feature_list, max_features, dataset):
    features = []
    lbl = []

    with open(data_name, 'r') as f:
        for line in f:

            y = int(line[0]) + 1
            lbl.append(y)

            sentence = line_to_words(line, dataset)
            all_features = [feat.sentenceToFeatures(sentence) for feat in feature_list]
            sentence_features = reduce(lambda l1, l2: l1+l2, all_features, [])
            pad_sentence(sentence_features, max_features)
            features.append(sentence_features)

    return np.array(features, dtype=np.int32), np.array(lbl, dtype=np.int32)
    def __feature_importance_bar_plot(self, fig, subplot_pos):
        features = []
        importance = []
        file_stream = open(self.args.ranked_features, 'r')
        for line in file_stream:
            line = line.strip().split()
            features.append(line[0].replace('_', ' '))
            importance.append(float(line[1]))

        ax = fig.add_subplot(subplot_pos[0], subplot_pos[1], subplot_pos[2])
        ax.bar([x for x in xrange(len(importance))], importance)
        ax.set_xlabel('Features', fontsize=8)
        ax.set_xticks(numpy.arange(len(importance)))
        ax.set_xticklabels(features, rotation=90, fontsize=6)
        ax.set_ylabel('Importance', fontsize=8)
        ax.yaxis.set_tick_params(labelsize=8)
        ax.xaxis.set_tick_params(labelsize=6)
def convert_data(data_name, feature_list, max_features, dataset):
    features = []
    lbl = []

    with open(data_name, 'r') as f:
        for line in f:

            y = int(line[0]) + 1
            lbl.append(y)

            sentence = line_to_words(line, dataset)
            all_features = [
                feat.sentenceToFeatures(sentence) for feat in feature_list
            ]
            sentence_features = reduce(lambda l1, l2: l1 + l2, all_features,
                                       [])
            pad_sentence(sentence_features, max_features)
            features.append(sentence_features)

    return np.array(features, dtype=np.int32), np.array(lbl, dtype=np.int32)
Exemple #11
0
def simple_extractors(dialogs_df, extractors):
    features = []
    for ex in extractors:
        feat = dialogs_df['dialog'].apply(ex).apply(pd.Series, 1)
        # naming with consciousness
        feat_len = int(feat.shape[1] / 2)
        if feat_len > 1:
            feat.columns = [
                '{ex}_{side}_{i}'.format(ex=ex.__name__, side=s, i=i)
                for s in ['self', 'that'] for i in range(1, feat_len + 1)
            ]
        else:
            feat.columns = [
                '{ex}_{side}'.format(ex=ex.__name__, side=s)
                for s in ['self', 'that']
            ]
        features.append(feat)

    features_df = pd.concat(features, axis=1)

    return features_df
Exemple #12
0
def one_instance(params, feat, rep):
    P = evolve.init(params)

    # assign fitness for the first generation (not necessary though!)
    ev = list(map(lambda s: evolve.eval_string(s, P, params), P['parents']))
    P['fitness'] = ev

    feats = features.append(
        P, feat, params, 0,
        rep)  #init msmt, alt could first do one round of selection

    init_params = params.copy()  #for dynamic params
    rep_params = params.copy()  #dynamics will overwrite as it goes

    var_t, sel_t, feat_t, che_t = 0, 0, 0, 0

    for i in range(params['iters']):

        #t0=time.time()
        evolve.variation(P, rep_params, i, init_params)
        #t1=time.time()
        evolve.select(P, rep_params)
        #t2=time.time()

        feats = features.append(P, feat, rep_params, i + 1,
                                rep)  #msre AFTER update
        #t3=time.time()
        if params['debug']:
            evolve.check(P, rep_params)
        #t4=time.time()

        #var_t += t1-t0
        #sel_t += t2-t1
        #feat_t += t3-t2
        #che_t += t4-t3

    #times = {'var':var_t,'sel':sel_t,}
    #print(var_t,sel_t,feat_t,che_t)

    return feats