def main(): pp = Preprocessor() print 'processing custom data, computing bows...' tdpath = 'dataset/test/sms-data' pp.process_custom_data(tdpath) fm = FeatureModel() print 'converting custom data to fvs...' fm.compute_custom_fv_matrix('custom') tdpath = 'bin_data/custom_fv.npy' cpath = 'bin_data/mnb-classifier.npy' data = np.load('bin_data/custom-data.npy').item() tester = Tester(tdpath,cpath) print 'predicting labels for custom data...' results = tester.predict_labels_for_custom_data(data) with open('output/results.txt','w') as textfile: for msg in results: line = '%s -> %s\n' % (msg,results[msg]) textfile.write(line) textfile.close() print 'Results written to results.txt'
def main(): fm = FeatureModel() fm.extract_features()
def main(): fm = FeatureModel() fm.compute_fv_matrix('testing')
from FeatureModel import FeatureModel m = FeatureModel('Root') m.add_feature(parent='Root', children=['NewNode-1%d' % (id+1) for id in range(3)], max=4, min=2) m.add_feature(parent='Root', children=['NewNode-2%d' % (id+1) for id in range(7)], max=4, min=2) m.add_feature(parent='Root', children=['NewNode-3%d' % (id+1) for id in range(4)], max=4, min=4) m.add_feature(parent='NewNode-23', children=['NewNode-23%d' % (id+1) for id in range(5)], max=1, min=1) # m.pretty_print() m.instantiate(debug=True) input('Waiting...') m.clear() m = FeatureModel('S') m.add_feature(parent='S', children=['D', 'E'], min=1, max=2) m.add_feature(parent='S', children=['A', 'B', 'C'], min=2, max=2) m.add_feature(parent='B', children=['BA', 'BB', 'BC', 'BD', 'BE', 'BF'], min=2, max=5) m.instantiate(debug=True)
train_iterator = torchdata.BucketIterator(train_dataset, batch_size=config['batch_size'], device=device, sort_within_batch=False) test_iterator = torchdata.BucketIterator(test_dataset, batch_size=config['batch_size'], device=device, sort_within_batch=False) FEATURES.get_ngram_features( [example.features for example in train_dataset.examples]) LABEL.build_vocab(train_dataset) FEATURES.build_vocab(train_dataset) num_classes, weights = utils.get_weights( [e.label for e in train_dataset.examples], config) featureModel = FeatureModel(FEATURES.get_features_count(), num_classes, config['dropout']).to(device) # featureModel = FeatureModel(153099, 3, config['dropout']).to(device) if config['load_checkpoint']: featureModel.load_state_dict( torch.load(config['checkpoint'], map_location=device)) print( f'Model has {utils.count_parameters(featureModel)} trainable parameters' ) # weights vectors analysis # weights_analysis(featureModel) optimiser = torch.optim.Adam(featureModel.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])