def main(augur_url, host, port): """ Declares singular worker and creates the server and flask app that it will be running on """ app = Flask(__name__) #load credentials broker_host = read_config("Server", "host", "AUGUR_HOST", "0.0.0.0") broker_port = read_config("Server", "port", "AUGUR_PORT", 5000) database_host = read_config('Database', 'host', 'AUGUR_DB_HOST', 'host') worker_info = read_config('Workers', 'repo_info_worker', None, None) worker_port = worker_info['port'] if 'port' in worker_info else port while True: try: r = requests.get("http://{}:{}/AUGWOP/heartbeat".format(host, worker_port)).json() if 'status' in r: if r['status'] == 'alive': worker_port += 1 except: break logging.basicConfig(filename='worker_{}.log'.format(worker_port), filemode='w', level=logging.INFO) config = { "id": "com.augurlabs.core.repo_info_worker.{}".format(worker_port), "broker_port": broker_port, "broker_host": broker_host, "location": "http://{}:{}".format(read_config('Server', 'host', 'AUGUR_HOST', 'localhost'),worker_port), "host": database_host, "key": read_config("Database", "key", "AUGUR_GITHUB_API_KEY", "key"), "password": read_config('Database', 'password', 'AUGUR_DB_PASSWORD', 'password'), "port": read_config('Database', 'port', 'AUGUR_DB_PORT', 'port'), "user": read_config('Database', 'user', 'AUGUR_DB_USER', 'user'), "database": read_config('Database', 'name', 'AUGUR_DB_NAME', 'database'), "endpoint": "https://bestpractices.coreinfrastructure.org/projects.json", "display_name": "", "description": "", "required": 1, "type": "string" } #create instance of the worker app.gh_repo_info_worker = GHRepoInfoWorker(config) # declares the worker that will be running on this server with specified config create_server(app, None) logging.info("Starting Flask App with pid: " + str(os.getpid()) + "...") app.run(debug=app.debug, host=host, port=worker_port) if app.gh_repo_info_worker._child is not None: app.gh_repo_info_worker._child.terminate() try: requests.post('http://{}:{}/api/unstable/workers/remove'.format(server['host'],server['port']), json={"id": config['id']}) except: pass logging.info("Killing Flask App: " + str(os.getpid())) os.kill(os.getpid(), 9)
def test_read_config_no_exception(): db_name = read_config('Database', 'user', 'AUGUR_DB_USER', 'augur', config_file_path="augur.config.json") assert db_name == "augur"
def __init__(self): self.upstream_db = 7 self.cursor = None self.cursor_people = None self.db = None self.db_people = None worker_options = read_config("Workers", "facade_worker", None, None) if 'repo_directory' in worker_options: self.repo_base_directory = worker_options['repo_directory'] else: self.log_activity( 'Error', "Please specify a \'repo_directory\' parameter" " in your \'Workers\' -> \'facade_worker\' object in your config " "to the directory in which you want to clone repos. Exiting..." ) sys.exit(1) self.tool_source = '\'FacadeAugur\'' self.tool_version = '\'0.0.1\'' self.data_source = '\'git_repository\'' # Figure out how much we're going to log logging.basicConfig(filename='worker_{}.log'.format( worker_options['port']), filemode='w', level=logging.INFO) self.log_level = None #self.get_setting('log_level')
def log_activity(self, level, status): # Log an activity based upon urgency and user's preference. If the log level is # "Debug", then just print it and don't save it in the database. log_options = ('Error', 'Quiet', 'Info', 'Verbose', 'Debug') logging.info("* %s\n" % status) if self.log_level == 'Debug' and level == 'Debug': return #if log_options.index(level) <= log_options.index(self.log_level): query = ("INSERT INTO utility_log (level,status) VALUES (%s,%s)") try: self.cursor.execute(query, (level, status)) self.db.commit() except Exception as e: logging.info('Error encountered: {}\n'.format(e)) # Set up the database db_user = read_config('Database', 'user', 'AUGUR_DB_USER', 'augur') db_pass = read_config('Database', 'password', 'AUGUR_DB_PASSWORD', 'augur') db_name = read_config('Database', 'name', 'AUGUR_DB_NAME', 'augur') db_host = read_config('Database', 'host', 'AUGUR_DB_HOST', 'localhost') db_port = read_config('Database', 'port', 'AUGUR_DB_PORT', 5432) db_user_people = db_user db_pass_people = db_pass db_name_people = db_name db_host_people = db_host db_port_people = db_port # Open a general-purpose connection db, cursor = self.database_connection(db_host, db_user, db_pass, db_name, db_port, False, False) self.cursor.execute(query, (level, status)) self.db.commit()
def commit_model(self): # Figure out what we need to do limited_run = read_config("Facade", name="limited_run", default=0) delete_marked_repos = read_config("Facade", name="delete_marked_repos", default=0) pull_repos = read_config("Facade", name="pull_repos", default=0) clone_repos = read_config("Facade", name="clone_repos", default=1) check_updates = read_config("Facade", name="check_updates", default=0) force_updates = read_config("Facade", name="force_updates", default=0) run_analysis = read_config("Facade", name="run_analysis", default=0) force_analysis = read_config("Facade", name="force_analysis", default=0) nuke_stored_affiliations = read_config("Facade", name="nuke_stored_affiliations", default=0) fix_affiliations = read_config("Facade", name="fix_affiliations", default=1) force_invalidate_caches = read_config("Facade", name="force_invalidate_caches", default=0) rebuild_caches = read_config("Facade", name="rebuild_caches", default=1) #if abs((datetime.datetime.strptime(self.cfg.get_setting('aliases_processed')[:-3], # '%Y-%m-%d %I:%M:%S.%f') - datetime.datetime.now()).total_seconds()) // 3600 > int(self.cfg.get_setting( # 'update_frequency')) else 0 force_invalidate_caches = read_config("Facade", name="force_invalidate_caches", default=0) create_xlsx_summary_files = read_config("Facade", name="create_xlsx_summary_files", default=0) multithreaded = read_config("Facade", name="multithreaded", default=1) opts,args = getopt.getopt(sys.argv[1:],'hdpcuUaAmnfIrx') for opt in opts: if opt[0] == '-h': print("\nfacade-worker.py does everything by default except invalidating caches\n" "and forcing updates, unless invoked with one of the following options.\n" "In those cases, it will only do what you have selected.\n\n" "Options:\n" " -d Delete marked repos\n" " -c Run 'git clone' on new repos\n" " -u Check if any repos should be marked for updating\n" " -U Force all repos to be marked for updating\n" " -p Run 'git pull' on repos\n" " -a Analyze git repos\n" " -A Force all repos to be analyzed\n" " -m Disable multithreaded mode (but why?)\n" " -n Nuke stored affiliations (if mappings modified by hand)\n" " -f Fill empty affiliations\n" " -I Invalidate caches\n" " -r Rebuild unknown affiliation and web caches\n" " -x Create Excel summary files\n\n") sys.exit(0) elif opt[0] == '-d': delete_marked_repos = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: delete marked repos.') elif opt[0] == '-c': clone_repos = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: clone new repos.') elif opt[0] == '-u': check_updates = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: checking for repo updates') elif opt[0] == '-U': force_updates = 1 self.cfg.log_activity('Info','Option set: forcing repo updates') elif opt[0] == '-p': pull_repos = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: update repos.') elif opt[0] == '-a': run_analysis = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: running analysis.') elif opt[0] == '-A': force_analysis = 1 run_analysis = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: forcing analysis.') elif opt[0] == '-m': multithreaded = 0 self.cfg.log_activity('Info','Option set: disabling multithreading.') elif opt[0] == '-n': nuke_stored_affiliations = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: nuking all affiliations') elif opt[0] == '-f': fix_affiliations = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: fixing affiliations.') elif opt[0] == '-I': force_invalidate_caches = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: Invalidate caches.') elif opt[0] == '-r': rebuild_caches = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: rebuilding caches.') elif opt[0] == '-x': create_xlsx_summary_files = 1 limited_run = 1 self.cfg.log_activity('Info','Option set: creating Excel summary files.') # Get the location of the directory where git repos are stored repo_base_directory = self.cfg.repo_base_directory # Determine if it's safe to start the script current_status = self.cfg.get_setting('utility_status') if current_status != 'Idle': self.cfg.log_activity('Error','Something is already running, aborting maintenance ' 'and analysis.\nIt is unsafe to continue.') # sys.exit(1) if len(repo_base_directory) == 0: self.cfg.log_activity('Error','No base directory. It is unsafe to continue.') update_status('Failed: No base directory') sys.exit(1) # Begin working start_time = time.time() self.cfg.log_activity('Quiet','Running facade-worker') if not limited_run or (limited_run and delete_marked_repos): git_repo_cleanup(self.cfg) if not limited_run or (limited_run and clone_repos): git_repo_initialize(self.cfg) if not limited_run or (limited_run and check_updates): check_for_repo_updates(self.cfg) if force_updates: force_repo_updates(self.cfg) if not limited_run or (limited_run and pull_repos): git_repo_updates(self.cfg) if force_analysis: force_repo_analysis(self.cfg) if not limited_run or (limited_run and run_analysis): analysis(self.cfg, multithreaded) if nuke_stored_affiliations: nuke_affiliations(self.cfg) if not limited_run or (limited_run and fix_affiliations): fill_empty_affiliations(self.cfg) if force_invalidate_caches: invalidate_caches(self.cfg) if not limited_run or (limited_run and rebuild_caches): rebuild_unknown_affiliation_and_web_caches(self.cfg) if not limited_run or (limited_run and create_xlsx_summary_files): self.cfg.log_activity('Info','Creating summary Excel files') # from excel_generators import * self.cfg.log_activity('Info','Creating summary Excel files (complete)') # All done self.cfg.update_status('Idle') self.cfg.log_activity('Quiet','facade-worker.py completed') elapsed_time = time.time() - start_time print('\nCompleted in %s\n' % datetime.timedelta(seconds=int(elapsed_time))) self.cfg.cursor.close() self.cfg.cursor_people.close() self.cfg.db.close() self.cfg.db_people.close()
def test_read_config_exception(): with pytest.raises(AttributeError): db_name = read_config('Server', 'username')
def main(augur_url, host, port): """ Declares singular worker and creates the server and flask app that it will be running on """ app = Flask(__name__) #load credentials broker_host = read_config("Server", "host", "AUGUR_HOST", "0.0.0.0") broker_port = read_config("Server", "port", "AUGUR_PORT", 5000) database_host = read_config('Database', 'host', 'AUGUR_DB_HOST', 'host') worker_info = read_config('Workers', 'insight_worker', None, {}) worker_port = worker_info['port'] if 'port' in worker_info else port while True: try: r = requests.get("http://{}:{}/AUGWOP/heartbeat".format(host, worker_port)).json() if 'status' in r: if r['status'] == 'alive': worker_port += 1 except: break logging.basicConfig(filename='worker_{}.log'.format(worker_port), filemode='w', level=logging.INFO) config = { "id": "com.augurlabs.core.insight_worker.{}".format(worker_port), "broker_port": broker_port, "broker_host": broker_host, "location": "http://{}:{}".format(read_config('Server', 'host', 'AUGUR_HOST', 'localhost'),worker_port), "host": database_host, "key": read_config("Database", "key", "AUGUR_GITHUB_API_KEY", "key"), "password": read_config('Database', 'password', 'AUGUR_DB_PASSWORD', 'password'), "port": read_config('Database', 'port', 'AUGUR_DB_PORT', 'port'), "user": read_config('Database', 'user', 'AUGUR_DB_USER', 'user'), "database": read_config('Database', 'name', 'AUGUR_DB_NAME', 'database'), "endpoint": "https://bestpractices.coreinfrastructure.org/projects.json", "anomaly_days": worker_info['anomaly_days'] if 'anomaly_days' in worker_info else 2, "training_days": worker_info['training_days'] if 'training_days' in worker_info else 365, "confidence_interval": worker_info['confidence_interval'] if 'confidence_interval' in worker_info else .95, "contamination": worker_info['contamination'] if 'contamination' in worker_info else 0.041, 'metrics': worker_info['metrics'] if 'metrics' in worker_info else {"issues-new": "issues", "code-changes": "commit_count", "code-changes-lines": "added", "reviews": "pull_requests", "contributors-new": "new_contributors"} } #create instance of the worker app.insight_worker = InsightWorker(config) # declares the worker that will be running on this server with specified config create_server(app, None) print("Starting Flask App on host {} with port {} with pid: ".format(broker_host, worker_port) + str(os.getpid()) + "...") app.run(debug=app.debug, host=host, port=worker_port) print("Killing Flask App: {} and telling broker that this worker is disconnected.".format(str(os.getpid()))) try: logging.info("Sending disconnected message to broker... @ -> {} with info: {}\n".format('http://{}:{}/api/unstable/workers'.format( config['broker_host'], config['broker_port']), config)) requests.post('http://{}:{}/api/unstable/workers/remove'.format( config['broker_host'], config['broker_port']), json=config) #hello message except Exception as e: logging.info("Ran into error: {}".format(e)) logging.info("Broker's port is busy, worker will not be able to accept tasks, " "please restart Augur if you want this worker to attempt connection again.")
def analyze_commit(cfg, repo_id, repo_loc, commit, multithreaded): # This function analyzes a given commit, counting the additions, removals, and # whitespace changes. It collects all of the metadata about the commit, and # stashes it in the database. A new database connection is opened each time in # case we are running in multithreaded mode, since MySQL cursors are not # currently threadsafe. ### Local helper functions ### def check_swapped_emails(name, email): # Sometimes people mix up their name and email in their git settings if name.find('@') >= 0 and email.find('@') == -1: cfg.log_activity('Debug', 'Found swapped email/name: %s/%s' % (email, name)) return email, name else: return name, email def strip_extra_amp(email): # Some repos have multiple ampersands, which really messes up domain pattern # matching. This extra info is not used, so we discard it. if email.count('@') > 1: cfg.log_activity('Debug', 'Found extra @: %s' % email) return email[:email.find('@', email.find('@') + 1)] else: return email def discover_alias(email): # Match aliases with their canonical email fetch_canonical = ("SELECT canonical_email " "FROM contributors_aliases " "WHERE alias_email=%s " "AND cntrb_active = 1") cursor_people_local.execute(fetch_canonical, (email, )) db_people_local.commit() canonical = list(cursor_people_local) if canonical: for email in canonical: return email[0] else: return email def update_contributors(author_em, committer_em, auth_nm, cmtr_nm): #Check if an email already exists in the database for either the committer or the author #There is a committer and an author on each commit, but only one record in the contributor table (ideally) # For each email address. So, for each email address, we need to check if it exists in the contributor # Table. def contributor_exists(some_email): #SQL String to insert values into the contributors table some_email = some_email.replace("'", "") email_check = ( """SELECT cntrb_email, tool_source, tool_version, data_source FROM contributors WHERE cntrb_email = '{}'""" .format(some_email)) cursor_local.execute(email_check) if cursor_local.fetchone() is not None: db_local.commit() emails_to_add = some_email return True else: return False #SQL to update the contributors table cntrb = ( "INSERT INTO contributors " "(cntrb_email,cntrb_canonical,cntrb_full_name,tool_source, tool_version, data_source) " "VALUES (%s,%s,%s,'FacadeAugur','0.0.1','git_repository')") ## Logic block for updating contributors. if contributor_exists(author_em): cfg.log_activity( 'Info', 'Author contributor record already exists: {}'.format( author_em)) else: # add a contributor record for the author cursor_local.execute( cntrb, (author_em, discover_alias(author_em), str(auth_nm))) db_local.commit() cfg.log_activity( 'Info', 'Stored author contributor with email: {}'.format(author_em)) if contributor_exists(committer_em): cfg.log_activity( 'Info', 'Author contributor record already exists: {}'.format( committer_em)) else: #add a contributor record for the committer cursor_local.execute( cntrb, (committer_em, discover_alias(committer_em), str(cmtr_nm))) db_local.commit() cfg.log_activity( 'Info', 'Stored committer contributor with email: {}'.format( committer_em)) def store_commit(repos_id, commit, filename, author_name, author_email, author_date, author_timestamp, committer_name, committer_email, committer_date, committer_timestamp, added, removed, whitespace): # Fix some common issues in git commit logs and store data. # Sometimes git is misconfigured and name/email get swapped author_name, author_email = check_swapped_emails( author_name, author_email) committer_name, committer_email = check_swapped_emails( committer_name, committer_email) # Some systems append extra info after a second @ author_email = strip_extra_amp(author_email) committer_email = strip_extra_amp(committer_email) store = ("""INSERT INTO commits (repo_id,cmt_commit_hash,cmt_filename, cmt_author_name,cmt_author_raw_email,cmt_author_email,cmt_author_date,cmt_author_timestamp, cmt_committer_name,cmt_committer_raw_email,cmt_committer_email,cmt_committer_date,cmt_committer_timestamp, cmt_added,cmt_removed,cmt_whitespace, cmt_date_attempted, tool_source, tool_version, data_source) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""") try: cursor_local.execute(store, ( repos_id, str(commit), filename, str(author_name), author_email, discover_alias(author_email), author_date, author_timestamp, committer_name, committer_email, discover_alias(committer_email), committer_date, committer_timestamp, added, removed, whitespace, committer_date, cfg.tool_source, cfg.tool_version, cfg.data_source, )) db_local.commit() except: try: cfg.log_activity( 'Info', """Timezone error caught, inspect values: INSERT INTO commits (repo_id,cmt_commit_hash,cmt_filename, cmt_author_name,cmt_author_raw_email,cmt_author_email,cmt_author_date,cmt_author_timestamp, cmt_committer_name,cmt_committer_raw_email,cmt_committer_email,cmt_committer_date,cmt_committer_timestamp, cmt_added,cmt_removed,cmt_whitespace, cmt_date_attempted, tool_source, tool_version, data_source) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""". format(repos_id, str(commit), filename, str(author_name), author_email, discover_alias(author_email), author_date, author_timestamp, committer_name, committer_email, discover_alias(committer_email), committer_date, committer_timestamp, added, removed, whitespace, committer_date, cfg.tool_source, cfg.tool_version, cfg.data_source)) except: cfg.log_activity( 'Info', 'Something wrong in error log for timezone error') cfg.log_activity('Debug', 'Stored commit: %s' % commit) # Check if email already exists in db # email_check = ("""SELECT cntrb_email, tool_source, tool_version, data_source # FROM contributors WHERE cntrb_email = {augur_email} OR cntrb_email = {committer_email}}""") ## Commented out so as to not update contributors ## sean: 11/6/2019 ## Goal: Address with the contributors model worker # try: # update_contributors(author_email, committer_email, author_name, committer_name) # except Exception: #print(e) # cfg.log_activity('Info', str(traceback.print_exc())) ### The real function starts here ### header = True filename = '' filename = '' added = 0 removed = 0 whitespace = 0 db_user = read_config('Database', 'user', 'AUGUR_DB_USER', 'augur') db_pass = read_config('Database', 'password', 'AUGUR_DB_PASSWORD', 'augur') db_name = read_config('Database', 'name', 'AUGUR_DB_NAME', 'augur') db_host = read_config('Database', 'host', 'AUGUR_DB_HOST', 'localhost') db_port = read_config('Database', 'port', 'AUGUR_DB_PORT', 5432) db_user_people = db_user db_pass_people = db_pass db_name_people = db_name db_host_people = db_host db_port_people = db_port # Set up new threadsafe database connections if multithreading. Otherwise # use the gloabl database connections so we don't incur a performance # penalty. if multithreaded: db_local, cursor_local = cfg.database_connection( db_host, db_user, db_pass, db_name, db_port, False, True) db_people_local, cursor_people_local = cfg.database_connection( db_host_people, db_user_people, db_pass_people, db_name_people, db_port_people, True, True) else: db_local = cfg.db cursor_local = cfg.cursor db_people_local = cfg.db_people cursor_people_local = cfg.cursor_people # Read the git log git_log = subprocess.Popen([ "git --git-dir %s log -p -M %s -n1 " "--pretty=format:'" "author_name: %%an%%nauthor_email: %%ae%%nauthor_date:%%ai%%n" "committer_name: %%cn%%ncommitter_email: %%ce%%ncommitter_date: %%ci%%n" "parents: %%p%%nEndPatch' " % (repo_loc, commit) ], stdout=subprocess.PIPE, shell=True) ## # Stash the commit we're going to analyze so we can back it out if something # goes wrong later. store_working_commit = ("INSERT INTO working_commits " "(repos_id,working_commit) VALUES (%s,%s)") cursor_local.execute(store_working_commit, (repo_id, commit)) db_local.commit() cfg.log_activity('Debug', 'Stored working commit and analyzing : %s' % commit) for line in git_log.stdout.read().decode("utf-8", errors="ignore").split( os.linesep): if len(line) > 0: if line.find('author_name:') == 0: author_name = line[13:] continue if line.find('author_email:') == 0: author_email = line[14:] continue if line.find('author_date:') == 0: author_date = line[12:22] author_timestamp = line[12:] continue if line.find('committer_name:') == 0: committer_name = line[16:] continue if line.find('committer_email:') == 0: committer_email = line[17:] continue if line.find('committer_date:') == 0: committer_date = line[16:26] committer_timestamp = line[16:] continue if line.find('parents:') == 0: if len(line[9:].split(' ')) == 2: # We found a merge commit, which won't have a filename filename = '(Merge commit)' added = 0 removed = 0 whitespace = 0 continue if line.find('--- a/') == 0: if filename == '(Deleted) ': filename = filename + line[6:] continue if line.find('+++ b/') == 0: if not filename.find('(Deleted) ') == 0: filename = line[6:] continue if line.find('rename to ') == 0: filename = line[10:] continue if line.find('deleted file ') == 0: filename = '(Deleted) ' continue if line.find('diff --git') == 0: # Git only displays the beginning of a file in a patch, not # the end. We need some kludgery to discern where one starts # and one ends. This is the last line always separating # files in commits. But we only want to do it for the second # time onward, since the first time we hit this line it'll be # right after parsing the header and there won't be any useful # information contained in it. if not header: store_commit(repo_id, commit, filename, author_name, author_email, author_date, author_timestamp, committer_name, committer_email, committer_date, committer_timestamp, added, removed, whitespace) header = False # Reset stats and prepare for the next section whitespaceCheck = [] resetRemovals = True filename = '' added = 0 removed = 0 whitespace = 0 continue # Count additions and removals and look for whitespace changes if not header: if line[0] == '+': # First check if this is a whitespace change if len(line.strip()) == 1: # Line with zero length whitespace += 1 else: # Compare against removals, detect whitespace changes whitespaceChange = False for check in whitespaceCheck: # Mark matches of non-trivial length if line[1:].strip() == check and len( line[1:].strip()) > 8: whitespaceChange = True if whitespaceChange: # One removal was whitespace, back it out removed -= 1 whitespace += 1 # Remove the matched line whitespaceCheck.remove(check) else: # Did not trigger whitespace criteria added += 1 # Once we hit an addition, next removal line will be new. # At that point, start a new collection for checking. resetRemovals = True if line[0] == '-': removed += 1 if resetRemovals: whitespaceCheck = [] resetRemovals = False # Store the line to check next add lines for a match whitespaceCheck.append(line[1:].strip()) # Store the last stats from the git log store_commit(repo_id, commit, filename, author_name, author_email, author_date, author_timestamp, committer_name, committer_email, committer_date, committer_timestamp, added, removed, whitespace) # Remove the working commit. try: remove_commit = ("DELETE FROM working_commits " "WHERE repos_id = %s AND working_commit = %s") cursor_local.execute(remove_commit, (repo_id, commit)) db_local.commit() cfg.log_activity('Debug', 'Completed and removed working commit: %s' % commit) except: cfg.log_activity('Info', 'Working Commit: %s' % commit) # If multithreading, clean up the local database if multithreaded: cursor_local.close() cursor_people_local.close() db_local.close() db_people_local.close()
def time_series_metrics(self, entry_info, repo_id): training_days = 365 repo_id = 25432 augur_api_host = read_config( "Server", "host", "AUGUR_HOST", "0.0.0.0", config_file_path='/Users/pratikmishra/augur/augur.config.json') augur_api_port = read_config( "Server", "port", "AUGUR_PORT", 5000, config_file_path='/Users/pratikmishra/augur/augur.config.json') base_url = 'http://{}:{}/api/unstable/repo-groups/20/repos/{}/'.format( augur_api_host, augur_api_port, repo_id) begin_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=training_days) index = pd.date_range(begin_date, periods=training_days, freq='D') df = pd.DataFrame(index) df.columns = ['date'] df['date'] = df['date'].astype(str) for endpoint in time_series: print(endpoint) url = base_url + endpoint print("Hitting endpoint: " + url + "\n") try: data = requests.get(url=url).json() print(data) except: data = json.loads(json.dumps(requests.get(url=url).text)) if len(data) == 0: print( "Endpoint with url: {} returned an empty response. Moving on to next endpoint.\n" .format(url)) continue if 'date' not in data[0]: logging.info( "Endpoint {} is not a timeseries, moving to next endpoint.\n". format(endpoint)) continue metric_df = pd.DataFrame.from_records(data) metric_df['date'] = pd.to_datetime(metric_df['date']).dt.date metric_df['date'] = metric_df['date'].astype(str) extra = ['repo', 'rg'] for column in metric_df.columns: if any(x in column for x in extra): metric_df.drop(column, axis=1, inplace=True) df = pd.DataFrame( pd.merge(df, metric_df.loc[:, metric_df.columns], how='left', on='date')) metric_df.drop('date', axis=1, inplace=True) df.rename(columns={ i: "{} _ {}".format(endpoint, i) for i in metric_df.columns }, inplace=True) df = df.fillna(0) #df = df.groupby(df['date']).sum() time_series_LSTM_model(self, entry_info, repo_id, df)