Esempio n. 1
0
def runPart(train_file, test_file, best_answer, language):
    train_set = parse_data(train_file)
    test_set = parse_data(test_file)

#    for k in range(len(EXPERIMENTS)):
#        print "Experiment {0}".format(k)
#        ops = {"STOPWORDS":    EXPERIMENTS[k][0], \
#               "PUNCTUATION":  EXPERIMENTS[k][1], \
#               "BAGOFWORDS":   EXPERIMENTS[k][2], \
#               "COLLOCATION":  EXPERIMENTS[k][3], \
#               "PARTOFSPEECH": EXPERIMENTS[k][4]}
#
#        B.run(train_set, test_set, language, best_answer, ops)
#        evaluate_part(2)
    B.run(train_set, test_set, language, best_answer)
    evaluate_part(2)
Esempio n. 2
0
def import_data(data_file):
    counter = 0
    with open(data_file) as f:
        for row in parse_data(f):
            question = QuestionModel(**json2db(row))
            db.session.add(question)
            db.session.commit()
            counter += 1

    print('{} committed'.format(counter))
Esempio n. 3
0
 def test_parse_marker(self):
     marker = parse_data(Marker, self.marker_dummy)
     for key in self.marker_dummy:
         self.assertEqual(marker.__dict__[key], self.marker_dummy[key])
Esempio n. 4
0
 def test_bad_data(self):
     self.assertIsNone(parse_data(Marker, self.bad_marker_dummy))
Esempio n. 5
0
 def test_bad_data_format_marker_class(self):
     self.assertEqual(parse_data(Marker, self.bad_marker_dummy), None)
Esempio n. 6
0
 def test_class_without_parse_method(self):
     self.assertEqual(parse_data(self.NoParsedObject, self.marker_dummy),
                      None)
Esempio n. 7
0
def test_gp_mut(generations=20):
    data_path = Path('./containerfs/tmp/cetdl1772small.dat')
    training_data = parse_data(data_path)

    gpobj = GP(POP_SIZE, training_data, mutation_method='branch_replacement')
    gpobj.run(generations)
Esempio n. 8
0
def runPart(train_file, test_file, knn_answer, svm_answer, language):
    train_set = parse_data(train_file)
    test_set = parse_data(test_file)

    A.run(train_set, test_set, language, knn_answer, svm_answer)
Esempio n. 9
0
 def test_parse_marker(self):
     marker = parse_data(Marker, self.marker_dummy)
     for key, value in self.marker_dummy.iteritems():
         self.assertEqual(getattr(marker, key), value)
Esempio n. 10
0
import A, B
from main import parse_data

eng_train = parse_data('data/English-train.xml')
eng_test = parse_data('data/English-dev.xml')

cata_train = parse_data('data/Catalan-train.xml')
cata_test = parse_data('data/Catalan-dev.xml')

span_train = parse_data('data/Spanish-train.xml')
span_test = parse_data('data/Spanish-dev.xml')

# A.run(train, test, language, knn_file, svm_file):

A.run(eng_train, eng_test, 'English', 'KNN-English.answer',
      'SVM-English.answer')
A.run(cata_train, cata_test, 'Catalan', 'KNN-Catalan.answer',
      'SVM-Catalan.answer')
A.run(span_train, span_test, 'Spanish', 'KNN-Spanish.answer',
      'SVM-Spanish.answer')

# B.run(train, test, language, answer)
B.run(eng_train, eng_test, 'English', 'Best-English.answer')
B.run(cata_train, cata_test, 'Catalan', 'Best-Catalan.answer')
B.run(span_train, span_test, 'Spanish', 'Best-Spanish.answer')
Esempio n. 11
0
 def test_data_null(self):
     self.assertIsNone(parse_data(Marker, None))
Esempio n. 12
0
 def test_data_null(self):
     self.assertIsNone(parse_data(Marker, None))
Esempio n. 13
0
 def test_parse_marker(self):
     marker  = parse_data(Marker, self.marker_dummy)
     for key in self.marker_dummy:
         self.assertEqual(marker.__dict__[key], self.marker_dummy[key])
Esempio n. 14
0
 def test_class_without_parse_method(self):
     self.assertEqual(parse_data(self.NoParsedObject, self.marker_dummy), None)
Esempio n. 15
0
 def test_bad_data_format_marker_class(self):
     self.assertEqual(parse_data(Marker, self.bad_marker_dummy), None)
Esempio n. 16
0
 def test_data_null(self):
     self.assertEqual(parse_data(Marker, None), None)
Esempio n. 17
0
 def test_class_null(self):
     self.assertEqual(parse_data(None, self.marker_dummy), None)
Esempio n. 18
0
 def test_bad_data(self):
     self.assertIsNone(parse_data(Marker, self.bad_marker_dummy))
Esempio n. 19
0
 def test_parse_marker(self):
     marker = parse_data(Marker, self.marker_dummy)
     for key, value in self.marker_dummy.iteritems():
         self.assertEqual(getattr(marker, key), value)
Esempio n. 20
0
    return sense


def most_frequent_sense(data, sense_dict, language):
    outfile = codecs.open(language + '.baseline', encoding='utf-8', mode='w')
    for lexelt, instances in sorted(
            data.iteritems(),
            key=lambda d: main.replace_accented(d[0].split('.')[0])):
        for instance in sorted(instances,
                               key=lambda d: int(d[0].split('.')[-1])):
            instance_id = instance[0]
            sid = getFrequentSense(lexelt, sense_dict)
            outfile.write(
                main.replace_accented(lexelt + ' ' + instance_id + ' ' + sid +
                                      '\n'))
    outfile.close()


if __name__ == '__main__':
    data_path = '/home/595/Homework3/data/'
    if len(sys.argv) != 2:
        print 'Usage: python baseline.py <language>'
        sys.exit(0)
    language = sys.argv[1]
    train_file = data_path + language + '-train.xml'
    dev_file = data_path + language + '-dev.xml'
    train = main.parse_data(train_file)
    test = main.parse_data(dev_file)
    sense_dict = build_dict(train)
    most_frequent_sense(test, sense_dict, language)
Esempio n. 21
0
import couchdb
from main import parse_data

port = couchdb.client.Server('https://*****:*****@fiatjaf.iriscouch.com')['portao']

for row in port.view('webapp/only-raw', include_docs=True):
    print row
    print row.doc
    data = parse_data(row.value)
    data['_rev'] = row.doc.rev
    print data
    port[row.id] = data
Esempio n. 22
0
    Return the most frequent sense of a word (lexelt) in the training set
    '''
    sense = ''
    try:
        sense = sense_dict[lexelt]
    except KeyError:
        pass
    return sense

def most_frequent_sense(data, sense_dict, language):
    outfile = codecs.open(language + '.baseline', encoding='utf-8', mode='w')
    for lexelt, instances in sorted(data.iteritems(), key=lambda d: main.replace_accented(d[0].split('.')[0])):
        for instance in sorted(instances, key=lambda d: int(d[0].split('.')[-1])):
            instance_id = instance[0]
            sid = getFrequentSense(lexelt, sense_dict)
            outfile.write(main.replace_accented(lexelt + ' ' + instance_id + ' ' + sid + '\n'))
    outfile.close()

if __name__ == '__main__':
    data_path = '/home/595/Homework3/data/'
    if len(sys.argv) != 2:
        print 'Usage: python baseline.py <language>'
        sys.exit(0)
    language = sys.argv[1]
    train_file = data_path + language + '-train.xml'
    dev_file = data_path + language + '-dev.xml'
    train = main.parse_data(train_file)
    test = main.parse_data(dev_file)
    sense_dict = build_dict(train)
    most_frequent_sense(test, sense_dict,language)
Esempio n. 23
0
 def test_class_null(self):
     self.assertEqual(parse_data(None, self.marker_dummy), None)
Esempio n. 24
0
import main
from chromosome import Chromosome
main.parse_data()
fitness = main.fitness_function_procedure
chromosome = Chromosome(['1', '101', '10', '10'], fitness)
chromosome.calculate_value()
Esempio n. 25
0
 def test_data_null(self):
     self.assertEqual(parse_data(Marker, None), None)