parser.add_argument("-p", "--NPROC", type=int, default=pytheas.available_cpu_count()) parser.add_argument("-m", "--max_lines", type=int, default=10000) parser.add_argument("-c", "--db_cred_file", default="database_credentials.json") args = parser.parse_args(sys.argv[1:]) with open(args.db_cred_file) as f: credentials = json.load(f) args = parser.parse_args(sys.argv[1:]) # Database connection credentials db_cred = DotMap() db_cred.user = credentials["user"] db_cred.password = credentials["password"] db_cred.database = credentials["ground_truth_db"] db_cred.opendata_database = credentials["profile_db"] db_cred.port = credentials["port"] Pytheas = pytheas.PYTHEAS() Pytheas.load_weights(args.weights) pytheas.process_endpoint(args.portals, db_cred, NPROC=args.NPROC, max_lines=args.max_lines)
if not os.path.exists('add_nulls'): os.makedirs('add_nulls') if not os.path.exists('add_noise'): os.makedirs('add_noise') print(f'inject_percent_null={inject_percent_null}') print(f'inject_percent_outlier={inject_percent_outlier}') print(f'num_processors={num_processors}') k_folds = args.k_folds db_cred = DotMap() db_cred.user = args.user db_cred.database = args.database db_cred.port = args.port db_cred.password = getpass(prompt=f'Please enter password for user {db_cred.user} on database {db_cred.database}:') max_lines = 100 pat_classifier = pat.PYTHEAS() average_results = eval_pat_line_kfold(pat_classifier, k_folds, db_cred, top_level_dir, inject_percent_null, inject_percent_outlier, num_processors, max_lines )
parser.add_argument("-p", "--port", default=5532, help="port that postgresql database listens to") parser.add_argument("-n", "--num_processors", default = 64, type=int, help="number of processors to be used") parser.add_argument("-t", "--top_level_dir", default="/home/christina/OPEN_DATA_CRAWL_2018", help="path to Open Data Crawl") args = parser.parse_args(sys.argv[1:]) num_processors=min(args.num_processors,pytheas.available_cpu_count()) top_level_dir = args.top_level_dir # Database connection credentials db_cred = DotMap() db_cred.user = args.user db_cred.database = args.database db_cred.port = args.port db_cred.password = '' pytheas_model = pytheas.PYTHEAS() pytheas_model.collect_rule_activation(db_cred, num_processors, top_level_dir) print('\nLoading CACHED Training Data...') con=connect(dbname=db_cred.database, user=db_cred.user, host = 'localhost', password=db_cred.password, port = db_cred.port) undersampled_cell_data = pd.read_sql_query( sql = f"SELECT * FROM pat_data_cell_rules WHERE undersample=True", con=con) con.close()