def build_and_eval():
    utility.make_dir(OUTPUT_DIR)

    print('Building lexicon')
    poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE))
    agg_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE))
    lex_set = set(poli_lexicon+sost_lexicon+agg_lexicon)
    lex_solution_set =  set(sost_lexicon+agg_lexicon)
    
    lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE)
    lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE)

    print('Computing coverage')
    scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE)

    print('Building association matrix')    
    matrix = matrix_dict.Matrix_Dict(lex_set, lex_solution_set)
    matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)

    print('Eval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE)
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_tv_FILE, EVAL_NLP4FUN_DEV_TV_FILE)
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_bg_FILE, EVAL_NLP4FUN_DEV_BG_FILE)
Esempio n. 2
0
def build_and_eval():
    utility.make_dir(OUTPUT_DIR)
    print('\nBuilding lexicon')

    lex_set = lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)
    lex_solution_set = lex_set
    '''
    poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=100, inflected=True))
    print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon)))
    agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=100, inflected=True))
    print('\nSize of agg lex: {}'.format(len(agg_lexicon)))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    lex_solution_set =  set(sost_lexicon+agg_lexicon)
    #lex_solution_set = lex_set
    '''

    print('\nComputing lex coverage')
    scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set,
                                        corpora.GAME_SET_100_FILE,
                                        COVERAGE_WORD_GAME100_FILE)

    print('\nBuilding association matrix')
    matrix = Matrix(lex_set, lex_solution_set)
    matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO)
    corpora.addBigramFromPolirematicheInMatrix(matrix, weight=1)
    #corpora.addBigramFromCompunds(matrix, lex_set, min_len=4, weight=10)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)

    print('\nEval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
 def __init__(self,
              model,
              optimizer,
              gpu=-1,
              save_path='./',
              load_model=None,
              train_transform=None,
              test_transform=None,
              train_batch_size=64,
              test_batch_size=256,
              start_epoch=1,
              epochs=200,
              seed=1):
     self.model, self.optimizer = model, optimizer
     self.gpu, self.save_path, load_model = gpu, utility.remove_slash(
         save_path), load_model
     self.train_transform, self.test_transform = train_transform, test_transform
     self.train_batch_size, self.test_batch_size = train_batch_size, test_batch_size
     self.start_epoch, self.epochs, self.seed = start_epoch, epochs, seed
     # load mnist
     self.init_dataset()
     # initialize seed
     self.init_seed()
     # create directory
     utility.make_dir(save_path)
     # load pretrained model if possible
     self.load(load_model)
     # init log
     self.init_log()
Esempio n. 4
0
def saveParameters(fileDir):
    # Model name 1 mean dataset`s folder 1.
    model_name = '1'
    detection_model = objectDetector.load_model(model_name)
    # File is directory
    files = utility.get_filenames(fileDir)
    fileNames = []
    domColors = []
    wallColors = []
    floorColors = []

    for f in files:
        if "." not in f:
            continue
        print("Now proceeding ", f, " [ ", files.index(f), " ]")

        coord, str_tag, number_tag, score = objectDetector.inference(
            detection_model, f)

        # Save file name make.
        save_file_name = utility.add_name(f, "_od", extension="bin")
        dirs = save_file_name.split("/")

        save_image_name = ""
        for d in dirs[0:-1]:
            save_image_name += d + "/"
        save_image_name += f.split("/")[-1].split(".")[0] + "/"

        utility.make_dir(save_image_name)

        rect_files = []
        additional_infor = []

        for i in range(len(str_tag)):
            additional_infor.append(-1)
            rect_image = image_processing.get_rect_image(
                f, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]),
                int(coord[i][3]))
            rect_image_name = save_image_name + f.split("/")[-1]
            rect_image_name = utility.add_name(rect_image_name, "_" + str(i))
            rect_files.append(rect_image_name)
            utility.save_image(rect_image, rect_image_name)

        dom_color = image_processing.get_dominant_color(f)
        n_color = utility.get_remarkable_color_n(dom_color, MAX_COLOR_LENGTH)
        fileNames.append(os.path.basename(f))
        domColors.append(n_color)
        wallColors.append([])
        floorColors.append([])
        utility.save_result([
            coord, str_tag, number_tag, score, rect_files, additional_infor,
            n_color
        ], save_file_name)

    utility.save_result([files, domColors, wallColors, floorColors],
                        config.RESEARCH_BASE_FILE)
def build_and_eval():
    utility.make_dir(OUTPUT_DIR)

    print('Building lexicon')

    poli_lexicon = list(
        lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE))
    agg_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    lex_solution_set = set(sost_lexicon + agg_lexicon)
    '''
    poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=1000, inflected=True))
    print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon)))
    agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=1000, inflected=True))
    print('\nSize of agg lex: {}'.format(len(agg_lexicon)))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    '''

    lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE)
    lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE)

    print('Computing lex coverage')
    scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set,
                                        corpora.GAME_SET_100_FILE,
                                        COVERAGE_WORD_GAME100_FILE)

    print('Building association matrix')
    matrix = Matrix_Dict(lex_set, lex_solution_set)
    matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO)
    matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO,
                                    weight=DE_MAURO_WEIGHT)
    matrix.add_patterns_from_corpus(corpora.PROVERBI_INFO,
                                    weight=PROVERBI_WEIGHT)
    matrix.add_patterns_from_corpus(corpora.ITWAC_RAW_INFO, weight=1)
    matrix.add_patterns_from_corpus(corpora.WIKI_IT_TITLES_INFO,
                                    weight=WIKI_IT_WEIGHT)
    #matrix.add_patterns_from_corpus(corpora.WIKI_IT_TEXT_INFO, weight=1)
    corpora.addBigramFromPolirematicheInMatrix(matrix, DE_MAURO_WEIGHT)
    corpora.addBigramFromCompunds(matrix,
                                  lex_set,
                                  min_len=4,
                                  weight=COMPOUNDS_WEIGHT)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)

    print('Eval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
Esempio n. 6
0
def saveParameter(fileName, detection_model):
    coord, str_tag, number_tag, score = objectDetector.inference(
        detection_model, fileName)

    # Save file name make.
    save_file_name = config.RESEARCH_BASE_DIR + "/" + os.path.basename(
        utility.get_od_bin(fileName))
    dirs = save_file_name.split("/")

    save_image_name = ""
    for d in dirs[0:-1]:
        save_image_name += d + "/"
    save_image_name += fileName.split("/")[-1].split(".")[0] + "/"

    utility.make_dir(save_image_name)

    rect_files = []
    additional_infor = []

    for i in range(len(str_tag)):
        additional_infor.append(-1)
        rect_image = image_processing.get_rect_image(fileName,
                                                     int(coord[i][0]),
                                                     int(coord[i][1]),
                                                     int(coord[i][2]),
                                                     int(coord[i][3]))
        rect_image_name = save_image_name + fileName.split("/")[-1]
        rect_image_name = utility.add_name(rect_image_name, "_" + str(i))
        rect_files.append(rect_image_name)
        utility.save_image(rect_image, rect_image_name)

    dom_color = image_processing.get_dominant_color(fileName)
    n_color = utility.get_remarkable_color_n(dom_color, MAX_COLOR_LENGTH)
    utility.save_result([
        coord, str_tag, number_tag, score, rect_files, additional_infor,
        n_color
    ], save_file_name)
    return [
        coord, str_tag, number_tag, score, rect_files, additional_infor,
        n_color
    ]
def build():
    utility.make_dir(OUTPUT_DIR)

    print('Building lexicon')

    poli_lexicon = list(
        lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE))
    sost_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE))
    agg_lexicon = list(
        lexicon.loadLexiconFromFile(
            corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE))
    lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon)
    lex_solution_set = set(sost_lexicon + agg_lexicon)

    lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE)
    lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE)

    def add_patterns_from_corpus(corpus_info):
        lines_extractor = corpora.extract_lines(corpus_info)
        source = corpus_info['name']
        patterns_count = 0
        print("Adding patterns from source: {}".format(source))
        tot_lines = corpus_info['lines']
        for n, line in enumerate(lines_extractor, 1):
            patterns_count += patterns_extraction.addPatternsFromLineInMongo(
                line, lex_set, source)
            if n % 1000 == 0:
                sys.stdout.write("Progress: {0:.1f}%\r".format(
                    float(n) * 100 / tot_lines))
                sys.stdout.flush()
        print('Extracted patterns: {}'.format(patterns_count))

    # print('Computing lex coverage')
    # scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE)

    print('Adding patterns in db')
    add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO)
Esempio n. 8
0
def build_and_eval():
    utility.make_dir(OUTPUT_DIR)
    print('Building lexicon')
    lex_set = lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)
    lexicon_freq = {w: 1 for w in lex_set}
    #lex_solution_set =  lexicon.loadLexiconFromFile(corpora.DIZIONARIO_BASE_SOSTANTIVI_FILE)
    print('Lex size: {}'.format(len(lex_set)))
    lexicon.printLexFreqToFile(lexicon_freq, LEX_FREQ_FILE)
    print('Computing coverage')
    scorer.computeCoverageOfGameWordLex(lexicon_freq,
                                        corpora.GAME_SET_100_FILE,
                                        COVERAGE_WORD_GAME100_FILE)
    print('Building association matrix')
    matrix = matrix_dict.Matrix_Dict(lex_set=lex_set)  # lex_solution_set
    matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO)
    matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO,
                                    weight=DE_MAURO_WEIGHT)
    corpora.addBigramFromPolirematicheInMatrix(matrix, DE_MAURO_WEIGHT)
    matrix.compute_association_scores()
    matrix.write_matrix_to_file(MATRIX_FILE)
    print('Eval')
    scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE,
                                             EVAL_WORD_GAME100_FILE)
Esempio n. 9
0
def objectDetect(inputFile, outputFile):
    '''
	입력받은 inputFile의 가구를 ObjectDetection한 결과를 outputFile에 저장한다. json 형태로 저장한다.
	현재는 bin file로만 입출력이 가능.
	폴더를 입력하면 outputFile은 무시됨.
	'''
    if "." not in inputFile:
        # File is directory
        files = utility.get_filenames(inputFile)
        for f in files:
            if "." not in f:
                continue

            coord, str_tag, number_tag, score = objectDetector.inference(
                detection_model, f)
            # Save file name make.
            save_file_name = utility.add_name(f, "_od", extension="bin")
            dirs = save_file_name.split("/")
            save_image_name = ""
            for d in dirs[0:-1]:
                save_image_name += d + "/"
            save_image_name += f.split("/")[-1].split(".")[0] + "/"
            utility.make_dir(save_image_name)
            rect_files = []

            additional_infor = []
            for i in range(len(str_tag)):
                additional_infor.append(-1)
                rect_image = image_processing.get_rect_image(
                    f, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]),
                    int(coord[i][3]))
                rect_image_name = save_image_name + f.split("/")[-1]
                rect_image_name = utility.add_name(rect_image_name,
                                                   "_" + str(i))
                rect_files.append(rect_image_name)
                utility.save_image(rect_image, rect_image_name)
            utility.save_result([
                coord, str_tag, number_tag, score, rect_files, additional_infor
            ], save_file_name)

    else:
        coord, str_tag, number_tag, score = objectDetector.inference(
            detection_model, inputFile)
        # Save file name make.
        save_file_name = utility.add_name(inputFile, "_od", extension="bin")
        dirs = save_file_name.split("/")
        save_image_name = ""
        for d in dirs[0:-1]:
            save_image_name += d + "/"
        save_image_name += inputFile.split("/")[-1].split(".")[0] + "/"
        utility.make_dir(save_image_name)
        rect_files = []
        additional_infor = []
        for i in range(len(str_tag)):
            additional_infor.append(-1)
            rect_image = image_processing.get_rect_image(
                inputFile, int(coord[i][0]), int(coord[i][1]),
                int(coord[i][2]), int(coord[i][3]))
            rect_image_name = save_image_name + inputFile.split("/")[-1]
            rect_image_name = utility.add_name(rect_image_name, "_" + str(i))
            rect_files.append(rect_image_name)
            utility.save_image(rect_image, rect_image_name)
        utility.save_result(
            [coord, str_tag, number_tag, score, rect_files, additional_infor],
            outputFile)
def _generate_file_names(dataframe_dir):
    make_dir(dataframe_dir)
    return (os.path.join(dataframe_dir, 'train_sampled.pkl'),
            os.path.join(dataframe_dir, 'dev.pkl'),
            os.path.join(dataframe_dir, 'cost_versus_epoch.pkl'))
Esempio n. 11
0
        responsePattern.update(**responsePatternIn)
        TheLogger.debug("Incomming response pattern: \n" + \
                json_dumps(responsePattern))

        pyData_to_json_file(responsePattern, pathRespPatternFile)
    except Exception as e:
        TheLogger.error(str(e))


if __name__ == "__main__":
    """
    Main enter.
    """
    pathTempDataDir = path_join(PATH_ROOT, "temp")
    pathRespPatternFile = path_join(pathTempDataDir, "response_pattern.json")
    make_dir(pathTempDataDir)
    TheLogger.init(pathTempDataDir, "server.log")

    mode = sys.argv[1]
    if mode == "start":
        serverHost = "127.0.0.1"
        try:
            serverPort = int(sys.argv[2])
        except Exception as e:
            print("Error: specify port correctly.")
        start(serverHost, serverPort)
    elif mode == "set_response_pattern":
        pathRespPatternFileIn = sys.argv[2]
        flagAddData = bool(int(sys.argv[3]))
        set_response_pattern(pathRespPatternFileIn, flagAddData)
    elif mode == "get_response_pattern":