Example #1
0
def pyfasttext_sample():
    """https://pypi.org/project/pyfasttext/
    """
    model = FastText()
    # model.load_model('output/model_cooking_6.bin')
    model.load_model('output/model_cooking_5.ftz')
    result = model.predict_file('data/cooking/pre_cooking.valid', 2)
    for i, r in enumerate(result):
        print(i, r)
Example #2
0
def runExperiment(trainFileName, testFileName):
    global TMPFILENAME, DIM, MINCOUNT

    model = FastText()
    model.supervised(input=trainFileName,
                     output=TMPFILENAME,
                     dim=DIM,
                     minCount=MINCOUNT,
                     verbose=0)
    labels = model.predict_file(testFileName)
    data = readData(testFileName)
    correct = 0
    for i in range(0, len(labels)):
        data["classes"][i] = re.sub("__label__", "", data["classes"][i])
        if labels[i][0] == data["classes"][i]: correct += 1
    os.unlink(TMPFILENAME + ".bin")
    os.unlink(TMPFILENAME + ".vec")
    return ({"correct": correct, "labels": labels})
    def train(self, params, test_path):
        self.message('parameters:', params)
        try:
            kwargs = self.generate_fastText_hyperparams(params)
            self.message('fastText parameters:')
            for key, val in kwargs.items():
                self.message(' ', key, ':', val)

            model = FastText()
            model.supervised(input=self.TRAIN_PATH, output=self.MODEL_PATH, **kwargs)
            y_pred = [item[0] for item in model.predict_file(test_path)]
            acc = self.metrics_fun(y_pred)
            self.message('metrics:', acc)
            return (1.0 - acc)
        except Exception as exc:
            self.message('failed to train!')
            self.message(exc)
            return 1.0
  with open(TRAIN_FOLDER+str(k),"w") as train_file:
    for train in train_data:
      train_file.write(train+"\n")

for train in glob.glob(TRAIN_FOLDER+"*"):
  print("Processing of "+basename(train))  
  print("Processing of the model")
  
  #creating model with fastText
  model=FastText()
  classifier = model.supervised(input=train,output=MODEL_PATH+basename(train),lr=0.02,epoch=50)
  print("Testing the model")
  
  #testing the model
  print(TEST_FOLDER+basename(train))
  result=model.predict_file(TEST_FOLDER+basename(train), k=1)
  
  #opening test file to do confusion matrix
  with open(TEST_FOLDER+basename(train)) as f:
    test_dataset=f.read().splitlines()

    pred_labels_list=[]# list to save predicted labels with each model 
    test_labels=[]#list to save the real labels
    test_labels_normalized=[]#list to save real labels in forme  label1\nlabel2\nlabel3

    #opening test files to save labels 
    pred_labels=[] #list of list of labels estimated by the model
    with open(TEST_FOLDER+basename(train)) as f:
      for row in f:
        test_labels.append(row.split()[0])  
    #doing a list with real labels for the confusion matrix