Example #1
0
 def __init__(self):
     """
     Default constructor that calls evaluate_wmd_model() from within
     and prints out model accuracy when call to evaluate_wmd_model() is returned.
     """
     # parse MSR data
     test_data, sent_dict, pair_dict = parse()
     # word mover model -- take long to load the model!
     wm_model = WMD()
     # copnvert the ID->String dict to ID-> token dict
     candidate_dict = wmd_utils.sent_dict_to_tok_dict(sent_dict)
     wm_model.evaluate_model(candidate_dict, pair_dict)
Example #2
0
 def __init__(self):
     '''
     Create db connection and
     load data from the local folder.
     '''
     self.client = MongoClient('db.infinity.buda.link', 27017,
                               connect=False)
     self.db = self.client.styria
     self.documents = self.db.documents
     count = self.documents.count()
     if count <= 0:
         files = parser.parse('../20news-18828', 'iso-8859-1')
         for file_name, file_content in files.items():
             self.insert(file_content)
Example #3
0
 def __init__(self):
     '''
     Create db connection and
     load data from the local folder.
     '''
     self.client = MongoClient('db.infinity.buda.link',
                               27017,
                               connect=False)
     self.db = self.client.styria
     self.documents = self.db.documents
     count = self.documents.count()
     if count <= 0:
         files = parser.parse('../20news-18828', 'iso-8859-1')
         for file_name, file_content in files.items():
             self.insert(file_content)
Example #4
0
if __name__ == '__main__':

    # defaults
    QUERY = 'test case'
    ALGORITHM = 'bag_of_words'
    # ALGORITHM = 'vector_space'
    # ALGORITHM = 'binary_independence'
    RESULTS = 20
    DOCUMENT = ''

    # load local files
    # files_path = 'test-small/subset'
    # files_path = '20news-18828/alt.atheism'
    files_path = '20news-18828'
    logger.info('Loading files...')
    files = parse(files_path, 'iso-8859-1')
    logger.info('Files from %s are loaded.' % files_path)

    # create algorithm box object (context object)
    algorithm_box = AlgorithmBox()
    algorithm_box.files = files

    print()
    print('---- MANUAL ----------')
    print('-q "query" DEFAULT: "test case"')
    print('-a "algorithm" -> bag_of_words, vector_space, binary_independence')
    print('                  DEFAULT: bag_of_words')
    print('-n "number of results" DEFAULT: 20')
    print('-d "new document" DEFAULT: ""')
    print('-e exit')
    print('----------------------')
Example #5
0
if __name__ == '__main__':

    # defaults
    QUERY = 'test case'
    ALGORITHM = 'bag_of_words'
    # ALGORITHM = 'vector_space'
    # ALGORITHM = 'binary_independence'
    RESULTS = 20
    DOCUMENT = ''

    # load local files
    # files_path = 'test-small/subset'
    # files_path = '20news-18828/alt.atheism'
    files_path = '20news-18828'
    logger.info('Loading files...')
    files = parse(files_path, 'iso-8859-1')
    logger.info('Files from %s are loaded.' % files_path)

    # create algorithm box object (context object)
    algorithm_box = AlgorithmBox()
    algorithm_box.files = files

    print()
    print('---- MANUAL ----------')
    print('-q "query" DEFAULT: "test case"')
    print('-a "algorithm" -> bag_of_words, vector_space, binary_independence')
    print('                  DEFAULT: bag_of_words')
    print('-n "number of results" DEFAULT: 20')
    print('-d "new document" DEFAULT: ""')
    print('-e exit')
    print('----------------------')