예제 #1
0
def train(model, dist, undist, args):
    X_train, X_test, y_train, y_test = preproc.load_data(
        dist, undist, args.input_shape, 0.25)
    # model = models.blur_model(tuple(args.input_shape))

    current_time = time.strftime("%H:%M:%S", time.localtime())

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=2),
        ModelCheckpoint(filepath='weights/{}_{}.h5'.format(
            args.model, current_time),
                        monitor='val_loss',
                        save_best_only=True,
                        mode='auto')
    ]

    model.fit(X_train,
              y_train,
              callbacks=callbacks,
              batch_size=args.batch_size,
              epochs=args.num_epochs,
              verbose=2,
              validation_data=(X_test, y_test))
    score = model.evaluate(X_test, y_test, verbose=2)
    print('Test Loss:', score[0])
    print('Test accuracy:', score[1])
예제 #2
0
def get_tag_trans_counts(trainfile):
    """compute a dict of counters for tag transitions

    :param trainfile: name of file containing training data
    :returns: dict, in which keys are tags, and values are counters of succeeding tags
    :rtype: dict
    """
    tags_appeared = {START_TAG, END_TAG}
    tot_counts = defaultdict(lambda: Counter())
    #counters = get_tag_word_counts(trainfile)
    total_transitions = defaultdict(float)
    #gen = conll_seq_generator(trainfile)
    title, X, Y = load_data(trainfile)
    for k in range(len(X)):
        w = X[k]
        t = Y[k]
        total_transitions[(START_TAG, t[0])] += 1
        for i in range(len(t) - 1):
            total_transitions[(t[i], t[i + 1])] += 1
        tags_appeared = tags_appeared.union(set(t))
        total_transitions[(t[len(t) - 1], END_TAG)] += 1
    for tag_1 in tags_appeared:
        if tag_1 != END_TAG:
            for tag_2 in tags_appeared:
                tot_counts[tag_1][tag_2] = total_transitions[(tag_1, tag_2)]
        # if tag_1 != END_TAG:
        #     for tag_2 in tags_appeared:
        #         if tag_2 != START_TAG: #and tag_2 != END_TAG:

    # for tag_2 in tags_appeared:
    #     tot_counts[END_TAG][tag_2] = total_transitions[(END_TAG, tag_2)]
    return dict(tot_counts)
예제 #3
0
def get_tag_word_counts(trainfile):
    """
    Produce a Counter of occurences of word for each tag

    Parameters:
    trainfile: -- the filename to be passed as argument to conll_seq_generator
    :returns: -- a default dict of counters, where the keys are tags.
    """
    all_counters = defaultdict(lambda: Counter())
    #(words, tags)
    title, X, Y = load_data(trainfile)
    for i in range(len(X)):
        for j in range(len(X[i])):
            all_counters[Y[i][j]][X[i][j]] += 1
    return all_counters
예제 #4
0
def get_word_to_ix(input_file, max_size=100000):
    """
    creates a vocab that has the list of most frequent occuring words such that the size of the vocab <=max_size, 
    also adds an UNK token to the Vocab and then creates a dictionary that maps each word to a unique index, 
    :returns: vocab, dict
    vocab: list of words in the vocabulary
    dict: maps word to unique index
    """
    vocab_counter = Counter()
    X, Y = load_data(input_file)
    for i in range(len(X)):
        word_list = X[i]
        for word in word_list:
            vocab_counter[word] += 1
    vocab = [word for word, val in vocab_counter.most_common(max_size - 1)]
    vocab.append(UNK)

    word_to_ix = {}
    ix = 0
    for word in vocab:
        word_to_ix[word] = ix
        ix += 1

    return vocab, word_to_ix
예제 #5
0
def get_tag_to_ix(input_file):
    """
    creates a dictionary that maps each tag (including the START_TAG and END_TAG to a unique index and vice-versa
    :returns: dict1, dict2
    dict1: maps tag to unique index
    dict2: maps each unique index to its own tag
    """
    tag_to_ix = {}
    title, X, Y = load_data(input_file)
    for i in range(len(Y)):
        tag_list = Y[i]
        for tag in tag_list:
            if tag not in tag_to_ix:
                tag_to_ix[tag] = len(tag_to_ix)

    #adding START_TAG and END_TAG
    #if START_TAG not in tag_to_ix:
    #    tag_to_ix[START_TAG] = len(tag_to_ix)
    #if END_TAG not in tag_to_ix:
    #    tag_to_ix[END_TAG] = len(tag_to_ix)

    ix_to_tag = {v: k for k, v in tag_to_ix.items()}

    return tag_to_ix, ix_to_tag
예제 #6
0
파일: ann.py 프로젝트: pedrovbj/SVHN-ML
results_path = Path('{}_results'.format(prefix))
log_path = results_path / '{}_out_{}.txt'.format(prefix, timestamp)
model_path = results_path / '{}_model_{}.ckpt'.format(prefix, timestamp)
img_path = results_path / '{}_cross_entropy_{}.png'.format(prefix, timestamp)

# Init logger
print(log_path, end='\r\n')
logger = MyLogger(log_path)

## Disable TF log
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## Load data
logger.debug('Loading data... ')
t0 = datetime.now()
Xtrain, Ytrain, Xtest, Ytest = load_data()
Xtrain = flatten(Xtrain)
Xtest = flatten(Xtest)
Xtrain = (Xtrain - Xtrain.mean()) / Xtrain.std()
Xtest = (Xtest - Xtest.mean()) / Xtest.std()
dt = datetime.now() - t0
logger.debug('Done. [Elapsed {}]\r\n'.format(dt))

## Define and fit model
logger.debug('Model fitting...\r\n')
t0 = datetime.now()

# printing period for cost of test set and accuracy
print_period = 1

# Number of samples to take from test set each time it computes cost
예제 #7
0
r, c = 5, 5
noise = np.random.normal(0, 1, (r * c, latent_dim))
gen_imgs = model.predict(noise)


def normal(x):
    return (x - np.min(x)) / (np.max(x) - np.min(x))


gen_imgs = np.save("gen_imgs.npy", gen_imgs)

plt.close()
plt.imshow(gen_imgs[0, :, :, 0])
plt.show()

X_train = preproc.load_data()

# Normal Histograms

gen_data = gen_imgs[:, :, :, 0].ravel()
# print("max_gen: {}".format(max(gen_data)))
# print("min_gen: {}".format(min(gen_data)))
# gen_data = normal(gen_data)

data = X_train[:, :, :].ravel()

print("average_gen: {}".format(np.average(gen_data)))
print("average_21: {}".format(np.average(data)))
print("max_gen: {}".format(max(gen_data)))
print("min_gen: {}".format(min(gen_data)))
예제 #8
0
import argparse
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, classification_report,
                             confusion_matrix)
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.svm import SVC
from preproc import load_data, tokenize

parser = argparse.ArgumentParser()
parser.add_argument("--classifier", type=str, default='lr')

if __name__ == "__main__":

    train_df = load_data('train')
    valid_df = load_data('valid')

    x_train = train_df['text']
    y_train = train_df['stars']

    x_valid = valid_df['text']
    y_valid = valid_df['stars']

    tfidf = TfidfVectorizer(tokenizer=tokenize)
    tfidf.fit(x_train)

    args = parser.parse_args()
    if 'lr' == args.classifier:
        print("using single logistic regression")
        clf = LogisticRegression()