def __init__(self, parent): wx.Frame.__init__(self, parent, title="TradeGame") utilities = Utilities() self.utilities = utilities self.game_interface = Game_interface(utilities) self.run_gui() self.content = self.game_interface.utilities.text_package self.content_text.SetLabel(self.content) self.targeted_function = None
self.world.trade() sensible = 1 if action == 'travel': self.world.travel() sensible = 1 if action == 'build' and building_unlocked: self.world.build() sensible = 1 if action == 'interact': player_town.holdings[0].interact(self.world.player) sensible = 1 if action == 'exit game': self.run_game = False else: if sensible == 0: print('What?') def main_gameplay_loop(self): # regex this while self.run_game: self.main_interface() if __name__ == '__main__': utilities = Utilities() utilities.run_through_terminal = True game_interface = Game_interface(utilities) game_interface.main_gameplay_loop()
def get_all_commit_details(self, commit_list=None): """ Utility method used to get commit details from the github for a project. """ data_frame_from_git_command = pd.DataFrame(columns=["COMMIT_ID", "COMMIT_MESSAGE", "AUTHOR_NAME", "AUTHOR_EMAIL", "AUTHOR_TIMESTAMP", "COMMITTER_NAME", "COMMITTER_EMAIL", "COMMITTER_TIMESTAMP", "FILE_NAME", "FILE_STATUS", "LINES_ADDED", "LINES_MODIFIED", "LINES_DELETED", "NF", "ND", "FILES_ENTROPY", "FILE_URL"]) commit_file_path = f"{CDPConfigValues.cdp_dump_path}/{self.project_name}" project_path = f"{CDPConfigValues.local_git_repo}/{self.project_name}" CDPConfigValues.create_directory(project_path) git_command_csv_data = f"{CDPConfigValues.git_command_csv_data_path}/{self.project_name}" CDPConfigValues.create_directory(git_command_csv_data) CDPConfigValues.create_directory(f"{CDPConfigValues.git_command_log_path}/{self.project_name}") CDPConfigValues.create_directory(f"{CDPConfigValues.git_status_log_path}/{self.project_name}") CDPConfigValues.create_directory(f"{CDPConfigValues.git_stats_log_path}/{self.project_name}") start_time = time.time() if commit_list is None: commit_git_api_df = pd.read_csv(f"{commit_file_path}/{CDPConfigValues.commit_ids_file_name}") commit_git_api_df = commit_git_api_df["Commit_Ids"] commit_git_api_df = commit_git_api_df.drop_duplicates() commit_list = commit_git_api_df.to_list() print(f"Commit Ids to be Fetched {commit_list}") failed_commit_ids = commit_list if len(commit_list) != 0: loop_counter = 0 while len(failed_commit_ids) != 0 and loop_counter < 10: loop_counter = loop_counter + 1 commit_batches = list( Utilities.create_batches(failed_commit_ids, batch_size=CDPConfigValues.git_command_execute_batch_size)) total_batches = len(commit_batches) batch_counter, percent = 0, 0 failed_commit_ids = list() print(f"Pre-processing Batch size {CDPConfigValues.git_command_execute_batch_size}") print(f"Total Batches to be executed is {total_batches}") for batch in commit_batches: try: loop = asyncio.new_event_loop() # loop = asyncio.ProactorEventLoop() asyncio.set_event_loop(loop) if (total_batches * percent) // 100 == batch_counter: print( f"Total Batches completed is {batch_counter} " f"and Failed batches Count is {len(failed_commit_ids)}") percent = percent + 10 results_list = loop.run_until_complete( self.get_all_commits_async(batch)) for result in results_list: data_frame_from_git_command = pd.concat([data_frame_from_git_command, result], ignore_index=True) except Exception as e: print(f"Exception Occurred in get_all_commits !!!\n{traceback.print_tb(e.__traceback__)}") for commit_id in batch: failed_commit_ids.append(commit_id) batch_counter = batch_counter + 1 end_time = time.time() print(f"Time Taken Dev Experience {end_time - start_time}") data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command["AUTHOR_TIMESTAMP"].apply( lambda x: pd.Timestamp(x, tz="UTC")) data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command['AUTHOR_TIMESTAMP'].astype( str) data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command['AUTHOR_TIMESTAMP'].apply( lambda x: x[:-6]) data_frame_from_git_command["COMMITTER_TIMESTAMP"] = data_frame_from_git_command[ "COMMITTER_TIMESTAMP"].apply( lambda x: pd.Timestamp(x, tz="UTC")) data_frame_from_git_command['COMMITTER_TIMESTAMP'] = data_frame_from_git_command[ 'COMMITTER_TIMESTAMP'].astype( str) data_frame_from_git_command['COMMITTER_TIMESTAMP'] = data_frame_from_git_command[ 'COMMITTER_TIMESTAMP'].apply( lambda x: x[:-6]) return data_frame_from_git_command
def get_async_file_size(self, url_list, github_data_dump_df, web_constant, batch_size=4): CDPConfigValues.create_directory( f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/") github_data_dump_df = github_data_dump_df.sort_values(by=["COMMITTER_TIMESTAMP"], ascending=[True]) results, failed_url_list = list(), list() previous_commit_id = "" batch_counter = 0 batches = list(Utilities.create_batches(url_list, batch_size)) total_batches = len(batches) print(f"Total Batches to be executed is {total_batches}") percent = 0 batch_timer = time.time() request_counter = 0 previous_file_df = pd.DataFrame() for batch in batches: try: request_counter = request_counter + len(batch) current_time = time.time() time.sleep(1) while request_counter > 200 and (current_time - batch_timer) < 60: current_time = time.time() time.sleep(1) else: if (current_time - batch_timer) > 60: batch_timer = time.time() request_counter = 0 loop = asyncio.get_event_loop() # loop = asyncio.ProactorEventLoop() asyncio.set_event_loop(loop) self.header = web_constant.fetch_header() self.proxy = web_constant.fetch_proxy() if (total_batches * percent) // 100 == batch_counter: print(f"Total Batches completed is {batch_counter} and Failed Urls Count is {len(failed_url_list)}") percent = percent + 10 batch_counter = batch_counter + 1 results_list = loop.run_until_complete(self.fetch_all_wrt_index(batch, loop)) results_list.sort() for result in results_list: if isinstance(result, Exception): url = str(result).split(',')[0].split("url:")[1].replace("'", "") print(f"Exception : {result}\n {url}") failed_url_list.append(url) elif result is None: raise Exception(batch) elif result[1] is None or result[1] == '[]': print(f"File Size Json is Empty {result[0]} - {result[1]}") elif '"message":"API rate limit exceeded for user ID' in result[1] or \ '"message":"API rate limit exceeded for user ID' in result or \ '"message": "Server Error"' in result[1]: failed_url_list.append(result[0]) else: if "Server Error" in result[1]: print(f"url : {result[0]} and result -- {result[1]}") failed_url_list.append(result[0]) else: commit_id = result[0].split('?')[0].split('/')[-1] tree = json.loads(result[1]).get('tree', None) file_size_df = pd.DataFrame.from_dict(tree) file_list = github_data_dump_df.loc[github_data_dump_df["COMMIT_ID"] == commit_id][ "FILE_NAME"].drop_duplicates().to_list() for file_name in file_list: values = file_size_df.loc[file_size_df["path"] == file_name]["size"].values if len(values) == 1: file_size = int(values[0]) results.append((commit_id, file_name, file_size)) elif len(values) == 0: if previous_file_df is None: timestamp = \ github_data_dump_df.loc[github_data_dump_df["COMMIT_ID"] == commit_id][ "COMMITTER_TIMESTAMP"].to_list()[0] timestamp = \ github_data_dump_df.loc[ github_data_dump_df["COMMITTER_TIMESTAMP"] < timestamp][ "COMMITTER_TIMESTAMP"].to_list()[-1] commit = \ github_data_dump_df.loc[ github_data_dump_df["COMMITTER_TIMESTAMP"] == timestamp][ "COMMIT_ID"].values[0] if os.path.exists( f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{commit}.csv"): previous_file_df = pd.read_csv( f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{commit}.csv") if previous_file_df is None or len(previous_file_df) == 0: results.append((commit_id, file_name, 0)) else: values = previous_file_df.loc[previous_file_df["path"] == file_name][ "size"].values if len(values) == 0: results.append((commit_id, file_name, 0)) else: file_size = int(values[0]) results.append((commit_id, file_name, file_size)) elif len(values) > 1: results.append((commit_id, file_name, values[0])) print(f"file_name -- {file_name}, values -- {values}") previous_file_df = file_size_df previous_commit_id = commit_id except Exception as e: previous_file_df.to_csv( f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{previous_commit_id}.csv", index=False) for url in batch: failed_url_list.append(url) print(f"Exception Occurred for batch {batch_counter}!!! Execution is halted for 2 Seconds") print(f"Exception Occurred!!!\n{traceback.print_tb(e.__traceback__)}") time.sleep(2) failed_url_list = list(set(failed_url_list)) print(f"Total Url's Requested is {len(url_list)}") print(f"Total Url's Failed is {len(failed_url_list)}") async_result = (results, failed_url_list) return async_result
def get_async_data_using_asyncio(self, url_list, web_constant, batch_size=4): results, failed_url_list, exception_list = list(), list(), list() batch_counter, percent = 0, 0 batches = list(Utilities.create_batches(url_list, batch_size)) total_batches = len(batches) print(f"Total Batches to be executed is {total_batches}") batch_timer = time.time() request_counter = 0 for batch in batches: try: request_counter = request_counter + len(batch) current_time = time.time() time.sleep(1) while request_counter > 160 and (current_time - batch_timer) < 60: current_time = time.time() time.sleep(2) else: if (current_time - batch_timer) > 60: batch_timer = time.time() request_counter = 0 loop = asyncio.get_event_loop() asyncio.set_event_loop(loop) self.header = web_constant.fetch_header() self.proxy = web_constant.fetch_proxy() if (total_batches * percent) // 100 == batch_counter: print(f"Total Batches completed is {batch_counter} and Failed Urls Count is {len(failed_url_list)}") percent = percent + 10 results_list = loop.run_until_complete(self.fetch_all(batch, loop)) for result in results_list: if isinstance(result, Exception): print(f"Exception : {result}") failed_url_list.append(str(result).split(',')[0].split("url:")[1].replace("'", "")) elif result is None: raise Exception(batch) elif results is not None and (result[1] is None or result[1] == '[]'): print(f"No Commit Details found for commit Id {result[0]} - {result[1]}") elif '"message":"API rate limit exceeded for user ID' in result[1] or \ '"message":"API rate limit exceeded for user ID' in result or \ '"message": "Server Error"' in result[1]: failed_url_list.append(result[0]) else: if "Server Error" in result[1]: print(f"url : {result[0]} and result -- {result[1]}") failed_url_list.append(result[0]) else: url = result[0].split("/events")[0].split("/")[-1] results.append((url, result[1])) batch_counter = batch_counter + 1 except Exception as e: for url in batch: failed_url_list.append(url) exception_list.append(e) print(f"Exception Occurred for batch {batch_counter}!!! Execution is halted for 2 Seconds") time.sleep(2) print(exception_list) if len(failed_url_list) > 0: failed_url_list = list(set(failed_url_list)) print(f"Total Url's Requested is {len(url_list)}") print(f"Total Url's Failed is {len(failed_url_list)}") async_result = (results, failed_url_list) return async_result
def get_event_data(self, ): """ internal method used for getting list of events for closed bugs. """ if os.path.exists( f"{self.cdp_dump_path}/{CDPConfigValues.project_issue_list_file_name}" ): self.bug_data_frame = pd.read_csv( f"{self.cdp_dump_path}/{CDPConfigValues.project_issue_list_file_name}" ) else: self.bug_data_frame = self.get_bug_data() self.closed_bug_data_frame = self.bug_data_frame[ self.bug_data_frame['STATE'] == 'closed'] self.closed_bug_data_frame = self.closed_bug_data_frame.reset_index() self.event_data_frame = self.closed_bug_data_frame[[ "ISSUE_ID", "CREATED_TIMESTAMP", "UPDATED_TIMESTAMP" ]] """Fetch the Bug Id's from the data frame""" list_of_issues = self.closed_bug_data_frame['ISSUE_ID'].tolist() """using the Bugs Id list create event url list""" url_list = Utilities.format_url(self.event_url, list_of_issues) start_time = time.time() results = self.web_connection.get_async_data_using_asyncio( url_list, self.web_constants, batch_size=CDPConfigValues.git_api_batch_size) list_of_buggy_commits = results[0] failed_urls = results[1] loop_counter = 1 while len(failed_urls) > 0: time.sleep(60 * loop_counter) print(f"Total Failed URL's re-trying {len(failed_urls)}") results = self.web_connection.get_async_data_using_asyncio( failed_urls, self.web_constants, batch_size=CDPConfigValues.git_api_batch_size // 2) failed_urls = results[1] list_of_buggy_commits = list_of_buggy_commits + results[0] end_time = time.time() print("Parallel time taken to get all event data using (asyncio) =", end_time - start_time) list_of_buggy_commits = pd.DataFrame( list_of_buggy_commits, columns=["ISSUE_ID", "JSON_RESPONSE"]) list_of_buggy_commits['ISSUE_ID'] = list_of_buggy_commits[ 'ISSUE_ID'].astype(str) self.event_data_frame['ISSUE_ID'] = self.event_data_frame[ 'ISSUE_ID'].astype(str) self.event_data_frame = pd.merge(self.event_data_frame, list_of_buggy_commits, how="left", left_on=["ISSUE_ID"], right_on=["ISSUE_ID"]) self.event_data_frame.to_csv( f"{self.cdp_dump_path}/github_events_cdp_dump.csv", encoding='utf-8-sig', index=False) event_parser = EventsJsonParser() event_parser.find_buggy_commits_based_on_repository_fixes( self.web_constants, self.event_data_frame, f"{self.cdp_dump_path}/" f"{CDPConfigValues.closed_events_list_file_name}")
def get_events_data_for_scheduler(self, current_date, previous_bug_df, previous_closed_events_df): """ internal method used for getting list of events for closed bugs. :param current_date: Today's date :type current_date: date :param previous_bug_df: Dataframe of bugs file that is created from get_bug_data function :type previous_bug_df: Pandas Dataframe :param previous_closed_events_df: Dataframe containing details of closed bugs with fix commit ids :type previous_closed_events_df: Pandas Dataframe :returns: event dataframe for latest events for bugs that happened after previous_bug_df :rtype: Pandas Dataframe """ self.bug_data_frame = self.get_bug_data() self.closed_bug_data_frame = self.bug_data_frame[ self.bug_data_frame['STATE'] == 'closed'] self.closed_bug_data_frame = self.closed_bug_data_frame.reset_index() self.closed_bug_data_frame = self.closed_bug_data_frame[~( self.closed_bug_data_frame.ISSUE_ID.isin(previous_bug_df.ISSUE_ID) )] if len(self.closed_bug_data_frame) != 0: self.event_data_frame = self.closed_bug_data_frame[[ "ISSUE_ID", "CREATED_TIMESTAMP", "UPDATED_TIMESTAMP" ]] """Fetch the Bug Id's from the data frame""" list_of_issues = self.closed_bug_data_frame['ISSUE_ID'].tolist() """using the Bugs Id list create event url list""" url_list = Utilities.format_url(self.event_url, list_of_issues) start_time = time.time() results = self.web_connection.get_async_data_using_asyncio( url_list, self.web_constants, batch_size=CDPConfigValues.git_api_batch_size) list_of_buggy_commits = results[0] failed_urls = results[1] loop_counter = 1 while len(failed_urls) > 0: time.sleep(60 * loop_counter) print(f"Total Failed URL's re-trying {len(failed_urls)}") results = self.web_connection.get_async_data_using_asyncio( failed_urls, self.web_constants, CDPConfigValues.git_api_batch_size // 2) failed_urls = results[1] list_of_buggy_commits = list_of_buggy_commits + results[0] end_time = time.time() print( "Parallel time taken to get all event data using (asyncio) =", end_time - start_time) list_of_buggy_commits = pd.DataFrame( list_of_buggy_commits, columns=["ISSUE_ID", "JSON_RESPONSE"]) list_of_buggy_commits['ISSUE_ID'] = list_of_buggy_commits[ 'ISSUE_ID'].astype(str) self.event_data_frame['ISSUE_ID'] = self.event_data_frame[ 'ISSUE_ID'].astype(str) self.event_data_frame = pd.merge(self.event_data_frame, list_of_buggy_commits, how="left", left_on=["ISSUE_ID"], right_on=["ISSUE_ID"]) self.event_data_frame.to_csv( f"{CDPConfigValues.schedule_file_path}/{self.project_name}/{current_date}/github_events_cdp_dump.csv", encoding='utf-8-sig', index=False) event_parser = EventsJsonParser() event_df = event_parser.find_buggy_commits_based_on_repository_fixes( self.web_constants, self.event_data_frame) event_df = pd.concat([event_df, previous_closed_events_df], ignore_index=True) return event_df else: return None
def get_commit_details(self): """ method clones the project repository and collects commit data """ start_time = time.time() if CDPConfigValues.use_git_command_to_fetch_commit_details: git_data = GitData(self.project) git_data.clone_project() print("Getting Commit ids using git commands...") commit_ids = git_data.get_all_commit_ids() commit_ids = list(set(commit_ids)) commit_ids_data_frame = pd.DataFrame(commit_ids, columns=['Commit_Ids']) commit_ids_data_frame.to_csv( f"{self.cdp_dump_path}/{CDPConfigValues.commit_ids_file_name}", index=False) print("Getting Commit details using git commands...") self.commit_details = git_data.get_all_commit_details( commit_ids_data_frame['Commit_Ids'].to_list()) self.commit_details.to_csv( f"{self.cdp_dump_path}/{CDPConfigValues.commit_details_file_name}", encoding='utf-8-sig', index=False) else: commit_data = self.get_commit_data_asyncio() commit_parser = CommitsJsonParser() commit_ids = commit_parser.parse_id_listing(commit_data) commit_ids = list(set(commit_ids)) commit_ids_data_frame = pd.DataFrame(commit_ids, columns=['Commit_Ids']) commit_ids_data_frame.to_csv( f"{self.cdp_dump_path}/{CDPConfigValues.commit_ids_file_name}", index=False) print( f"Total Unique Commit IDs to be fetched is {len(commit_ids)}") url_list = Utilities.create_url(self.commit_details_url, commit_ids) results = self.web_connection.get_async_data_using_asyncio( url_list, self.web_constants, batch_size=CDPConfigValues.git_api_batch_size) commit_details = results[0] failed_urls = results[1] loop_counter = 1 while len(failed_urls) > 0 and loop_counter < 20: time.sleep(60 * pow(2, loop_counter)) print(f"Total Failed URL's re-trying {len(failed_urls)}") results = self.web_connection.get_async_data_using_asyncio( failed_urls, self.web_constants, CDPConfigValues.git_api_batch_size // 2) failed_urls = results[1] commit_details = commit_details + results[0] self.commit_details = commit_parser.parse_json( commit_details, self.cdp_dump_path) self.get_missing_files() end_time = time.time() print(f"Fetched all commit details in {end_time - start_time}")