コード例 #1
0
ファイル: Gui.py プロジェクト: AaronFR/TradeGame-Python
    def __init__(self, parent):
        wx.Frame.__init__(self, parent, title="TradeGame")
        utilities = Utilities()
        self.utilities = utilities
        self.game_interface = Game_interface(utilities)

        self.run_gui()

        self.content = self.game_interface.utilities.text_package
        self.content_text.SetLabel(self.content)
        self.targeted_function = None
コード例 #2
0
            self.world.trade()
            sensible = 1
        if action == 'travel':
            self.world.travel()
            sensible = 1
        if action == 'build' and building_unlocked:
            self.world.build()
            sensible = 1
        if action == 'interact':
            player_town.holdings[0].interact(self.world.player)
            sensible = 1

        if action == 'exit game':
            self.run_game = False
        else:
            if sensible == 0:
                print('What?')

    def main_gameplay_loop(self):
        # regex this
        while self.run_game:
            self.main_interface()


if __name__ == '__main__':
    utilities = Utilities()
    utilities.run_through_terminal = True

    game_interface = Game_interface(utilities)
    game_interface.main_gameplay_loop()
コード例 #3
0
ファイル: GitData.py プロジェクト: zeromtmu/codedefectai
    def get_all_commit_details(self, commit_list=None):
        """
            Utility method used to get commit details from the github for a project.
        """

        data_frame_from_git_command = pd.DataFrame(columns=["COMMIT_ID", "COMMIT_MESSAGE",
                                                            "AUTHOR_NAME", "AUTHOR_EMAIL", "AUTHOR_TIMESTAMP",
                                                            "COMMITTER_NAME", "COMMITTER_EMAIL", "COMMITTER_TIMESTAMP",
                                                            "FILE_NAME", "FILE_STATUS",
                                                            "LINES_ADDED", "LINES_MODIFIED", "LINES_DELETED",
                                                            "NF", "ND", "FILES_ENTROPY", "FILE_URL"])

        commit_file_path = f"{CDPConfigValues.cdp_dump_path}/{self.project_name}"

        project_path = f"{CDPConfigValues.local_git_repo}/{self.project_name}"
        CDPConfigValues.create_directory(project_path)

        git_command_csv_data = f"{CDPConfigValues.git_command_csv_data_path}/{self.project_name}"

        CDPConfigValues.create_directory(git_command_csv_data)
        CDPConfigValues.create_directory(f"{CDPConfigValues.git_command_log_path}/{self.project_name}")
        CDPConfigValues.create_directory(f"{CDPConfigValues.git_status_log_path}/{self.project_name}")
        CDPConfigValues.create_directory(f"{CDPConfigValues.git_stats_log_path}/{self.project_name}")

        start_time = time.time()
        if commit_list is None:
            commit_git_api_df = pd.read_csv(f"{commit_file_path}/{CDPConfigValues.commit_ids_file_name}")

            commit_git_api_df = commit_git_api_df["Commit_Ids"]
            commit_git_api_df = commit_git_api_df.drop_duplicates()

            commit_list = commit_git_api_df.to_list()

        print(f"Commit Ids to be Fetched {commit_list}")
        failed_commit_ids = commit_list

        if len(commit_list) != 0:
            loop_counter = 0

            while len(failed_commit_ids) != 0 and loop_counter < 10:

                loop_counter = loop_counter + 1

                commit_batches = list(
                    Utilities.create_batches(failed_commit_ids,
                                             batch_size=CDPConfigValues.git_command_execute_batch_size))
                total_batches = len(commit_batches)
                batch_counter, percent = 0, 0
                failed_commit_ids = list()

                print(f"Pre-processing Batch size {CDPConfigValues.git_command_execute_batch_size}")
                print(f"Total Batches to be executed is {total_batches}")

                for batch in commit_batches:
                    try:
                        loop = asyncio.new_event_loop()
                        # loop = asyncio.ProactorEventLoop()
                        asyncio.set_event_loop(loop)

                        if (total_batches * percent) // 100 == batch_counter:
                            print(
                                f"Total Batches completed is {batch_counter} "
                                f"and Failed batches Count is {len(failed_commit_ids)}")
                            percent = percent + 10

                        results_list = loop.run_until_complete(
                            self.get_all_commits_async(batch))
                        for result in results_list:
                            data_frame_from_git_command = pd.concat([data_frame_from_git_command, result],
                                                                    ignore_index=True)

                    except Exception as e:
                        print(f"Exception Occurred in get_all_commits !!!\n{traceback.print_tb(e.__traceback__)}")
                        for commit_id in batch:
                            failed_commit_ids.append(commit_id)

                    batch_counter = batch_counter + 1

            end_time = time.time()
            print(f"Time Taken Dev Experience {end_time - start_time}")

            data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command["AUTHOR_TIMESTAMP"].apply(
                lambda x: pd.Timestamp(x, tz="UTC"))

            data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command['AUTHOR_TIMESTAMP'].astype(
                str)

            data_frame_from_git_command['AUTHOR_TIMESTAMP'] = data_frame_from_git_command['AUTHOR_TIMESTAMP'].apply(
                lambda x: x[:-6])

            data_frame_from_git_command["COMMITTER_TIMESTAMP"] = data_frame_from_git_command[
                "COMMITTER_TIMESTAMP"].apply(
                lambda x: pd.Timestamp(x, tz="UTC"))

            data_frame_from_git_command['COMMITTER_TIMESTAMP'] = data_frame_from_git_command[
                'COMMITTER_TIMESTAMP'].astype(
                str)

            data_frame_from_git_command['COMMITTER_TIMESTAMP'] = data_frame_from_git_command[
                'COMMITTER_TIMESTAMP'].apply(
                lambda x: x[:-6])

        return data_frame_from_git_command
コード例 #4
0
    def get_async_file_size(self, url_list, github_data_dump_df, web_constant, batch_size=4):

        CDPConfigValues.create_directory(
            f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/")
        github_data_dump_df = github_data_dump_df.sort_values(by=["COMMITTER_TIMESTAMP"], ascending=[True])

        results, failed_url_list = list(), list()
        previous_commit_id = ""
        batch_counter = 0
        batches = list(Utilities.create_batches(url_list, batch_size))
        total_batches = len(batches)
        print(f"Total Batches to be executed is {total_batches}")
        percent = 0
        batch_timer = time.time()
        request_counter = 0
        previous_file_df = pd.DataFrame()
        for batch in batches:
            try:
                request_counter = request_counter + len(batch)
                current_time = time.time()
                time.sleep(1)
                while request_counter > 200 and (current_time - batch_timer) < 60:
                    current_time = time.time()
                    time.sleep(1)
                else:
                    if (current_time - batch_timer) > 60:
                        batch_timer = time.time()
                        request_counter = 0

                loop = asyncio.get_event_loop()
                # loop = asyncio.ProactorEventLoop()
                asyncio.set_event_loop(loop)
                self.header = web_constant.fetch_header()
                self.proxy = web_constant.fetch_proxy()

                if (total_batches * percent) // 100 == batch_counter:
                    print(f"Total Batches completed is {batch_counter} and Failed Urls Count is {len(failed_url_list)}")
                    percent = percent + 10

                batch_counter = batch_counter + 1

                results_list = loop.run_until_complete(self.fetch_all_wrt_index(batch, loop))
                results_list.sort()
                for result in results_list:
                    if isinstance(result, Exception):
                        url = str(result).split(',')[0].split("url:")[1].replace("'", "")
                        print(f"Exception : {result}\n {url}")
                        failed_url_list.append(url)
                    elif result is None:
                        raise Exception(batch)
                    elif result[1] is None or result[1] == '[]':
                        print(f"File Size Json is Empty {result[0]} - {result[1]}")
                    elif '"message":"API rate limit exceeded for user ID' in result[1] or \
                            '"message":"API rate limit exceeded for user ID' in result or \
                            '"message": "Server Error"' in result[1]:
                        failed_url_list.append(result[0])
                    else:
                        if "Server Error" in result[1]:
                            print(f"url : {result[0]} and result -- {result[1]}")
                            failed_url_list.append(result[0])
                        else:
                            commit_id = result[0].split('?')[0].split('/')[-1]
                            tree = json.loads(result[1]).get('tree', None)
                            file_size_df = pd.DataFrame.from_dict(tree)
                            file_list = github_data_dump_df.loc[github_data_dump_df["COMMIT_ID"] == commit_id][
                                "FILE_NAME"].drop_duplicates().to_list()

                            for file_name in file_list:
                                values = file_size_df.loc[file_size_df["path"] == file_name]["size"].values
                                if len(values) == 1:
                                    file_size = int(values[0])
                                    results.append((commit_id, file_name, file_size))

                                elif len(values) == 0:
                                    if previous_file_df is None:
                                        timestamp = \
                                            github_data_dump_df.loc[github_data_dump_df["COMMIT_ID"] == commit_id][
                                                "COMMITTER_TIMESTAMP"].to_list()[0]
                                        timestamp = \
                                            github_data_dump_df.loc[
                                                github_data_dump_df["COMMITTER_TIMESTAMP"] < timestamp][
                                                "COMMITTER_TIMESTAMP"].to_list()[-1]
                                        commit = \
                                            github_data_dump_df.loc[
                                                github_data_dump_df["COMMITTER_TIMESTAMP"] == timestamp][
                                                "COMMIT_ID"].values[0]

                                        if os.path.exists(
                                                f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{commit}.csv"):
                                            previous_file_df = pd.read_csv(
                                                f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{commit}.csv")

                                    if previous_file_df is None or len(previous_file_df) == 0:
                                        results.append((commit_id, file_name, 0))
                                    else:
                                        values = previous_file_df.loc[previous_file_df["path"] == file_name][
                                            "size"].values
                                        if len(values) == 0:
                                            results.append((commit_id, file_name, 0))
                                        else:
                                            file_size = int(values[0])
                                            results.append((commit_id, file_name, file_size))

                                elif len(values) > 1:
                                    results.append((commit_id, file_name, values[0]))
                                    print(f"file_name -- {file_name}, values -- {values}")

                                previous_file_df = file_size_df
                                previous_commit_id = commit_id

            except Exception as e:
                previous_file_df.to_csv(
                    f"{CDPConfigValues.preprocessed_file_path}/{web_constant.project_name}/file_size/{previous_commit_id}.csv", index=False)
                for url in batch:
                    failed_url_list.append(url)
                print(f"Exception Occurred for batch {batch_counter}!!! Execution is halted for 2 Seconds")
                print(f"Exception Occurred!!!\n{traceback.print_tb(e.__traceback__)}")
                time.sleep(2)

        failed_url_list = list(set(failed_url_list))

        print(f"Total Url's Requested is {len(url_list)}")
        print(f"Total Url's Failed is {len(failed_url_list)}")

        async_result = (results, failed_url_list)
        return async_result
コード例 #5
0
    def get_async_data_using_asyncio(self, url_list, web_constant, batch_size=4):

        results, failed_url_list, exception_list = list(), list(), list()
        batch_counter, percent = 0, 0
        batches = list(Utilities.create_batches(url_list, batch_size))
        total_batches = len(batches)
        print(f"Total Batches to be executed is {total_batches}")
        batch_timer = time.time()
        request_counter = 0
        for batch in batches:
            try:
                request_counter = request_counter + len(batch)
                current_time = time.time()
                time.sleep(1)
                while request_counter > 160 and (current_time - batch_timer) < 60:
                    current_time = time.time()
                    time.sleep(2)
                else:
                    if (current_time - batch_timer) > 60:
                        batch_timer = time.time()
                        request_counter = 0

                loop = asyncio.get_event_loop()
                asyncio.set_event_loop(loop)

                self.header = web_constant.fetch_header()
                self.proxy = web_constant.fetch_proxy()
                if (total_batches * percent) // 100 == batch_counter:
                    print(f"Total Batches completed is {batch_counter} and Failed Urls Count is {len(failed_url_list)}")
                    percent = percent + 10

                results_list = loop.run_until_complete(self.fetch_all(batch, loop))
                for result in results_list:
                    if isinstance(result, Exception):
                        print(f"Exception : {result}")
                        failed_url_list.append(str(result).split(',')[0].split("url:")[1].replace("'", ""))
                    elif result is None:
                        raise Exception(batch)
                    elif results is not None and (result[1] is None or result[1] == '[]'):
                        print(f"No Commit Details found for commit Id {result[0]} - {result[1]}")
                    elif '"message":"API rate limit exceeded for user ID' in result[1] or \
                            '"message":"API rate limit exceeded for user ID' in result or \
                            '"message": "Server Error"' in result[1]:
                        failed_url_list.append(result[0])
                    else:
                        if "Server Error" in result[1]:
                            print(f"url : {result[0]} and result -- {result[1]}")
                            failed_url_list.append(result[0])
                        else:
                            url = result[0].split("/events")[0].split("/")[-1]
                            results.append((url, result[1]))
                batch_counter = batch_counter + 1
            except Exception as e:
                for url in batch:
                    failed_url_list.append(url)
                exception_list.append(e)
                print(f"Exception Occurred for batch {batch_counter}!!! Execution is halted for 2 Seconds")
                time.sleep(2)

        print(exception_list)
        if len(failed_url_list) > 0:
            failed_url_list = list(set(failed_url_list))

        print(f"Total Url's Requested is {len(url_list)}")
        print(f"Total Url's Failed is {len(failed_url_list)}")

        async_result = (results, failed_url_list)
        return async_result
コード例 #6
0
    def get_event_data(self, ):
        """
        internal method used for getting list of events for closed bugs.
        
        """

        if os.path.exists(
                f"{self.cdp_dump_path}/{CDPConfigValues.project_issue_list_file_name}"
        ):
            self.bug_data_frame = pd.read_csv(
                f"{self.cdp_dump_path}/{CDPConfigValues.project_issue_list_file_name}"
            )
        else:
            self.bug_data_frame = self.get_bug_data()
        self.closed_bug_data_frame = self.bug_data_frame[
            self.bug_data_frame['STATE'] == 'closed']
        self.closed_bug_data_frame = self.closed_bug_data_frame.reset_index()

        self.event_data_frame = self.closed_bug_data_frame[[
            "ISSUE_ID", "CREATED_TIMESTAMP", "UPDATED_TIMESTAMP"
        ]]
        """Fetch the Bug Id's from the data frame"""
        list_of_issues = self.closed_bug_data_frame['ISSUE_ID'].tolist()
        """using the Bugs Id list create event url list"""
        url_list = Utilities.format_url(self.event_url, list_of_issues)
        start_time = time.time()

        results = self.web_connection.get_async_data_using_asyncio(
            url_list,
            self.web_constants,
            batch_size=CDPConfigValues.git_api_batch_size)

        list_of_buggy_commits = results[0]
        failed_urls = results[1]
        loop_counter = 1

        while len(failed_urls) > 0:
            time.sleep(60 * loop_counter)
            print(f"Total Failed URL's re-trying {len(failed_urls)}")
            results = self.web_connection.get_async_data_using_asyncio(
                failed_urls,
                self.web_constants,
                batch_size=CDPConfigValues.git_api_batch_size // 2)
            failed_urls = results[1]
            list_of_buggy_commits = list_of_buggy_commits + results[0]
        end_time = time.time()
        print("Parallel time taken to get all event data using (asyncio) =",
              end_time - start_time)

        list_of_buggy_commits = pd.DataFrame(
            list_of_buggy_commits, columns=["ISSUE_ID", "JSON_RESPONSE"])
        list_of_buggy_commits['ISSUE_ID'] = list_of_buggy_commits[
            'ISSUE_ID'].astype(str)
        self.event_data_frame['ISSUE_ID'] = self.event_data_frame[
            'ISSUE_ID'].astype(str)
        self.event_data_frame = pd.merge(self.event_data_frame,
                                         list_of_buggy_commits,
                                         how="left",
                                         left_on=["ISSUE_ID"],
                                         right_on=["ISSUE_ID"])

        self.event_data_frame.to_csv(
            f"{self.cdp_dump_path}/github_events_cdp_dump.csv",
            encoding='utf-8-sig',
            index=False)
        event_parser = EventsJsonParser()
        event_parser.find_buggy_commits_based_on_repository_fixes(
            self.web_constants, self.event_data_frame, f"{self.cdp_dump_path}/"
            f"{CDPConfigValues.closed_events_list_file_name}")
コード例 #7
0
    def get_events_data_for_scheduler(self, current_date, previous_bug_df,
                                      previous_closed_events_df):
        """
        internal method used for getting list of events for closed bugs.
        
        :param current_date: Today's date
        :type current_date: date
        :param previous_bug_df: Dataframe of bugs file that is created from get_bug_data function
        :type previous_bug_df: Pandas Dataframe
        :param previous_closed_events_df: Dataframe containing details of closed bugs with fix commit ids
        :type previous_closed_events_df: Pandas Dataframe
        :returns: event dataframe for latest events for bugs that happened after previous_bug_df
        :rtype: Pandas Dataframe
        """
        self.bug_data_frame = self.get_bug_data()
        self.closed_bug_data_frame = self.bug_data_frame[
            self.bug_data_frame['STATE'] == 'closed']

        self.closed_bug_data_frame = self.closed_bug_data_frame.reset_index()

        self.closed_bug_data_frame = self.closed_bug_data_frame[~(
            self.closed_bug_data_frame.ISSUE_ID.isin(previous_bug_df.ISSUE_ID)
        )]

        if len(self.closed_bug_data_frame) != 0:
            self.event_data_frame = self.closed_bug_data_frame[[
                "ISSUE_ID", "CREATED_TIMESTAMP", "UPDATED_TIMESTAMP"
            ]]
            """Fetch the Bug Id's from the data frame"""
            list_of_issues = self.closed_bug_data_frame['ISSUE_ID'].tolist()
            """using the Bugs Id list create event url list"""
            url_list = Utilities.format_url(self.event_url, list_of_issues)
            start_time = time.time()

            results = self.web_connection.get_async_data_using_asyncio(
                url_list,
                self.web_constants,
                batch_size=CDPConfigValues.git_api_batch_size)

            list_of_buggy_commits = results[0]
            failed_urls = results[1]
            loop_counter = 1

            while len(failed_urls) > 0:
                time.sleep(60 * loop_counter)
                print(f"Total Failed URL's re-trying {len(failed_urls)}")
                results = self.web_connection.get_async_data_using_asyncio(
                    failed_urls, self.web_constants,
                    CDPConfigValues.git_api_batch_size // 2)
                failed_urls = results[1]
                list_of_buggy_commits = list_of_buggy_commits + results[0]

            end_time = time.time()
            print(
                "Parallel time taken to get all event data using (asyncio) =",
                end_time - start_time)

            list_of_buggy_commits = pd.DataFrame(
                list_of_buggy_commits, columns=["ISSUE_ID", "JSON_RESPONSE"])
            list_of_buggy_commits['ISSUE_ID'] = list_of_buggy_commits[
                'ISSUE_ID'].astype(str)
            self.event_data_frame['ISSUE_ID'] = self.event_data_frame[
                'ISSUE_ID'].astype(str)
            self.event_data_frame = pd.merge(self.event_data_frame,
                                             list_of_buggy_commits,
                                             how="left",
                                             left_on=["ISSUE_ID"],
                                             right_on=["ISSUE_ID"])

            self.event_data_frame.to_csv(
                f"{CDPConfigValues.schedule_file_path}/{self.project_name}/{current_date}/github_events_cdp_dump.csv",
                encoding='utf-8-sig',
                index=False)
            event_parser = EventsJsonParser()
            event_df = event_parser.find_buggy_commits_based_on_repository_fixes(
                self.web_constants, self.event_data_frame)

            event_df = pd.concat([event_df, previous_closed_events_df],
                                 ignore_index=True)

            return event_df

        else:
            return None
コード例 #8
0
    def get_commit_details(self):
        """
        method clones the project repository and collects commit data
        """
        start_time = time.time()
        if CDPConfigValues.use_git_command_to_fetch_commit_details:

            git_data = GitData(self.project)
            git_data.clone_project()

            print("Getting Commit ids using git commands...")
            commit_ids = git_data.get_all_commit_ids()
            commit_ids = list(set(commit_ids))
            commit_ids_data_frame = pd.DataFrame(commit_ids,
                                                 columns=['Commit_Ids'])
            commit_ids_data_frame.to_csv(
                f"{self.cdp_dump_path}/{CDPConfigValues.commit_ids_file_name}",
                index=False)

            print("Getting Commit details using git commands...")
            self.commit_details = git_data.get_all_commit_details(
                commit_ids_data_frame['Commit_Ids'].to_list())

            self.commit_details.to_csv(
                f"{self.cdp_dump_path}/{CDPConfigValues.commit_details_file_name}",
                encoding='utf-8-sig',
                index=False)
        else:
            commit_data = self.get_commit_data_asyncio()
            commit_parser = CommitsJsonParser()
            commit_ids = commit_parser.parse_id_listing(commit_data)

            commit_ids = list(set(commit_ids))
            commit_ids_data_frame = pd.DataFrame(commit_ids,
                                                 columns=['Commit_Ids'])
            commit_ids_data_frame.to_csv(
                f"{self.cdp_dump_path}/{CDPConfigValues.commit_ids_file_name}",
                index=False)

            print(
                f"Total Unique Commit IDs to be fetched is {len(commit_ids)}")
            url_list = Utilities.create_url(self.commit_details_url,
                                            commit_ids)

            results = self.web_connection.get_async_data_using_asyncio(
                url_list,
                self.web_constants,
                batch_size=CDPConfigValues.git_api_batch_size)
            commit_details = results[0]
            failed_urls = results[1]
            loop_counter = 1

            while len(failed_urls) > 0 and loop_counter < 20:
                time.sleep(60 * pow(2, loop_counter))

                print(f"Total Failed URL's re-trying {len(failed_urls)}")
                results = self.web_connection.get_async_data_using_asyncio(
                    failed_urls, self.web_constants,
                    CDPConfigValues.git_api_batch_size // 2)
                failed_urls = results[1]
                commit_details = commit_details + results[0]
            self.commit_details = commit_parser.parse_json(
                commit_details, self.cdp_dump_path)
            self.get_missing_files()

        end_time = time.time()
        print(f"Fetched all commit details in {end_time - start_time}")