Ejemplo n.º 1
0
                dataarray = getArrayFromPattern(label)

                for x in xrange(len(dataarray)):
                    featdict[featnamearray[x]][dataarray[x]] = 1

    input_dim_v = 0
    for featname, featvaluedict in featdict.items():
        i = 0
        flag = 0
        if feattypedict[featname] != 1:
            continue
        for key in sorted(featvaluedict.keys()):
            if key != 'xx':
                featvaluedict[key] = i
                i += 1
            else:
                flag = 1
        if flag == 1:
            featvaluedict['xx'] = i
            i += 1
        input_dim_v += i

    print "input vector dimension: %d" % input_dim_v
    # pprint.pprint(featdict)
    """
    save the dictionary information
    """
    output = open('featdict.pkl', 'wb')
    sPickle.dump((featnamearray, feattypedict, featdict), output)
    output.close()
Ejemplo n.º 2
0
                    tmp_N = np.array(lines[1:], dtype='float32')
                    tmp_N = (tmp_N - X_mean_N) / X_std_N
                    X_tmp_N.append(tmp_N)

        if REMOVE_SIL is True:
            Y_tmp = Y_tmp[np.array(idxarray)]

        assert (len(Y_tmp) == len(X_tmp_V))

        if fidx <= nTrain:
            X_train_N.append(X_tmp_N)
            X_train_V.append(X_tmp_V)
            Y_train.append(list(Y_tmp))
        else:
            X_test_N.append(X_tmp_N)
            X_test_V.append(X_tmp_V)
            Y_test.append(list(Y_tmp))

    print(len(X_train_N), len(X_train_V), len(X_test_N),
          len(X_test_V), len(Y_train), len(Y_test))

    output = open('sequence.pkl', 'wb')
    sPickle.dump(X_train_N, output)
    sPickle.dump(X_train_V, output)
    sPickle.dump(X_test_N, output)
    sPickle.dump(X_test_V, output)
    sPickle.dump(Y_train, output)
    sPickle.dump(Y_test, output)
    output.close()
    print "end"
Ejemplo n.º 3
0
    for featname, featvaluedict in featdict.items():
        i = 0
        flag = 0
        for key in sorted(featvaluedict.keys()):
            if key != 'xx':
                featvaluedict[key] = i
                i = i + 1
            else:
                flag = 1
        if flag == 1:
            featvaluedict['xx'] = i

    #pprint.pprint(featdict)

    output = open('featdict.pkl', 'wb')
    sPickle.dump(featdict, output)
    output.close()

    featurearrays = []
    for dataarray in dataarrays:
        featurearray = []
        for x in xrange(len(dataarray)):
            if feattypedict[featnamearray[x]] == 1:
                vector = [0 for y in featdict[featnamearray[x]].values()]
                vector[featdict[featnamearray[x]][dataarray[x]]] = 1
                featurearray = featurearray + vector
            else:
                pass
        featurearrays.append(featurearray)

    numberfeatures = numpy.array(numberfeatures, dtype=numpy.float32)
Ejemplo n.º 4
0
            X_tmp = []
            Y_tmp = np.fromfile(cmpfile, dtype='float32')
            Y_tmp = Y_tmp.reshape(-1, OUTPUT_DIM)
            with open(labfile) as fp:
                for line in fp.readlines():
                    lines = line.strip().split()
                    if(len(lines) < 3):
                        break
                    if (lines[0].find("-sil+")!=-1 and REMOVE_SIL is True):
                        idxarray.append(False)
                        pass
                    else:
                        idxarray.append(True)
                        X_tmp.append(lines[1:])
                        
            assert(len(idxarray) == len(Y_tmp))
            Y_tmp = ( 
            Y_train += Y_tmp
            X_train += X_tmp
   
    X_mean = X_train.mean(axis=0)
    X_std = X_train.std(axis=0)
    
    Y_mean = Y_train.mean(axis=0)
    Y_std = Y_train.std(axis=0)

    output = open('Normalize.pkl', 'wb')
    sPickle.dump((X_mean, X_std, Y_mean, Y_std), output)
    output.close()
    print "end"
Ejemplo n.º 5
0
            X_test_V += X_tmp_V
            Y_test += list(Y_tmp)

    X_train_N = np.array(X_train_N, dtype=np.float32)
    X_test_N = np.array(X_test_N, dtype=np.float32)
    X_train_V = np.array(X_train_V)
    X_test_V = np.array(X_test_V)
    Y_train = np.array(Y_train, dtype='float32')
    Y_test = np.array(Y_test, dtype='float32')

    X_mean_N = X_train_N.mean(axis=0)
    X_std_N = X_train_N.std(axis=0)
    Y_mean = Y_train.mean(axis=0)
    Y_std = Y_train.std(axis=0)

    sPickle.dump((X_mean_N, X_std_N, Y_mean, Y_std),
                 open("normalize.pkl", "wb"))

    X_train_N = (X_train_N - X_mean_N) / X_std_N
    X_test_N = (X_test_N - X_mean_N) / X_std_N
    Y_train = (Y_train - Y_mean) / Y_std
    Y_test = (Y_test - Y_mean) / Y_std

    np.random.shuffle(X_train_V)
    np.random.seed(SEED)  # reset seeds
    np.random.shuffle(X_train_N)

    print(X_train_N.shape, X_train_V.shape, X_test_N.shape, X_test_V.shape,
          Y_train.shape, Y_test.shape)

    output = open('data.pkl', 'wb')
    sPickle.dump(X_train_N, output)