def __init__(self): """ Default constructor that calls evaluate_wmd_model() from within and prints out model accuracy when call to evaluate_wmd_model() is returned. """ # parse MSR data test_data, sent_dict, pair_dict = parse() # word mover model -- take long to load the model! wm_model = WMD() # copnvert the ID->String dict to ID-> token dict candidate_dict = wmd_utils.sent_dict_to_tok_dict(sent_dict) wm_model.evaluate_model(candidate_dict, pair_dict)
def __init__(self): ''' Create db connection and load data from the local folder. ''' self.client = MongoClient('db.infinity.buda.link', 27017, connect=False) self.db = self.client.styria self.documents = self.db.documents count = self.documents.count() if count <= 0: files = parser.parse('../20news-18828', 'iso-8859-1') for file_name, file_content in files.items(): self.insert(file_content)
if __name__ == '__main__': # defaults QUERY = 'test case' ALGORITHM = 'bag_of_words' # ALGORITHM = 'vector_space' # ALGORITHM = 'binary_independence' RESULTS = 20 DOCUMENT = '' # load local files # files_path = 'test-small/subset' # files_path = '20news-18828/alt.atheism' files_path = '20news-18828' logger.info('Loading files...') files = parse(files_path, 'iso-8859-1') logger.info('Files from %s are loaded.' % files_path) # create algorithm box object (context object) algorithm_box = AlgorithmBox() algorithm_box.files = files print() print('---- MANUAL ----------') print('-q "query" DEFAULT: "test case"') print('-a "algorithm" -> bag_of_words, vector_space, binary_independence') print(' DEFAULT: bag_of_words') print('-n "number of results" DEFAULT: 20') print('-d "new document" DEFAULT: ""') print('-e exit') print('----------------------')