def main(): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # Logger. log = set_logger() log.debug(f"\n[START OF EXECUTION]") load_environment_variables() # Check if there is a running process that contains the name of this module. check_same_python_module_already_running(os.path.split(__file__)) # Input parameters. input_params = get_input_params() # Defines javac executable. java_exe = Path(os.environ['JAVA_HOME']) / 'bin' / 'java.exe' # Command. cmd = f"{java_exe} -cp {get_java_classpath()} {input_params['nlp_params'].java_class_name} " \ f"-host {input_params['mongo_params'].host} " \ f"-port {input_params['mongo_params'].port} " \ f"-dbName {input_params['mongo_params'].db_name} " \ f"-collName {input_params['mongo_params'].collection_name} " \ f"-startYear {input_params['filter_params'].start_year} " \ f"-endYear {input_params['filter_params'].end_year} " \ f"-textColumnName {input_params['filter_params'].column_name} " \ f"-maxNumTokens {input_params['nlp_params'].max_num_tokens} " \ f"-parserModel {input_params['nlp_params'].parser_model} " \ f"-createTrees {input_params['nlp_params'].get_trees} " \ f"-calcEmbeddings {input_params['nlp_params'].get_embeddings} " \ f"-calcCoherence {input_params['nlp_params'].get_coherence}" log.info(f"Running command: '{cmd}'") # Run command. os.system(cmd) log.info(f"\n[END OF EXECUTION]") final_time = time.time() log.info(f"Total execution time = {((final_time - initial_time) / 60)} minutes")
import os import random import time import numpy as np import pandas as pd from pymongo import MongoClient from tqdm import tqdm from syn.helpers.environment import load_environment_variables from syn.helpers.logging import set_logger from syn.helpers.mongodb import get_default_mongo_client from syn.helpers.system import check_same_python_module_already_running load_environment_variables() log = set_logger() def get_input_params(): parser = argparse.ArgumentParser( description='Generate pairs for similar issues.') parser.add_argument('--db_name', default='gerrit', type=str, help='Gerrit database name.') parser.add_argument('--collection_name', default='eclipse_similarities', type=str, help='Gerrit similarities collection name.') parser.add_argument('--output_db_name',
def main(): # Stores the execution start time to calculate the time it takes for the module to execute. initial_time = time.time() # Logger. log = set_logger() log.debug(f"\n[START OF EXECUTION]") load_environment_variables() # Check if there is a running process that contains the name of this module. check_same_python_module_already_running(os.path.split(__file__)) # Years range. input_params = get_input_params() # Databases. databases = [input_params['corpus']] if input_params['corpus'] != '' \ else os.environ["EMBEDDING_MONGODB_DATABASE_NAME"].split(",") # Java class. java_class_name = "UpdateMongoDBNLPFields" # Control params. model_param = f"--pm {'corenlp'}" if (input_params['get-coherence'] and input_params['get-trees']) \ else f"--pm {'srparser'}" trees_param = "--get-trees" if input_params['get-trees'] else "--no-get-trees" embeddings_param = "--get-embeddings " if (input_params['get-embeddings'] and input_params['get-trees']) \ else "--no-get-embeddings" coherence_param = "--get-coherence" if (input_params['get-coherence'] and input_params['get-trees']) \ else "--no-get-coherence" # Defines Python executable. python_exe = os.environ.get('PYTHON_EXECUTABLE', sys.executable) # Loop for obtain tokens number. tokens_initial_time = time.time() log.info(f"Updating NLP fields ...") for db in databases: log.info(f"\nProcessing database: '{db}'.") for year in range(input_params['start_year'], input_params['end_year']): log.info(f"\n[FOR LOOP] Processing years: {year} - {year + 1}") cmd = f"{python_exe} UpdateVectorizedMongoDBCollection.py --jcn {java_class_name}" \ f" --mh {os.environ['MONGO_HOST_IP']}" \ f" --mp {os.environ['MONGO_PORT']}" \ f" --db {db}" \ f" --c {os.environ['EMBEDDING_MONGODB_COLLECTION_NAME']}" \ f" --cl {os.environ['EMBEDDING_MONGODB_COLUMN_NAME']}" \ f" --sy {year}" \ f" --ey {year + 1} " \ f"--mnt {os.environ['EMBEDDING_MONGODB_MAX_NUM_TOKENS']} " \ f"{model_param} {trees_param} {embeddings_param} {coherence_param}" # Run command. log.info(f"Running command: '{cmd}'.") os.system(cmd) log.info(f"Updating NLP fields total execution time = {((time.time() - tokens_initial_time) / 60)} minutes") log.debug(f"\n[END OF EXECUTION]") final_time = time.time() log.info(f"Total execution time = {((final_time - initial_time) / 60)} minutes")