def parse_files_using_srcml(repository_id, repository_name): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id from files where repository_id = %s", (repository_id, )) files_results = cursor.fetchall() for file_line in files_results: file_id = file_line[0] file_versions_directory = DiretoryConfig.get_parameter('file_versions_directory') + repository_name parsed_files_directory = DiretoryConfig.get_parameter('parsed_files_directory') + repository_name create_directory(parsed_files_directory) cursor.execute('select id, commit_hash, version_path from file_versions where file_id = %s and has_parsed_file is false order by author_date', (file_id, )) file_vesions_result = cursor.fetchall() for file_versions_line in file_vesions_result: file_versions_id = file_versions_line[0] commit_hash = file_versions_line[1] version_path = file_versions_line[2] file_extension = version_path.split('.')[-1] local_file_copy = file_versions_directory +"/"+ str(file_id) + "_" + str(file_versions_id) + "_" + commit_hash +"."+ file_extension parsed_file_output = parsed_files_directory +"/"+ str(file_id) + "_" + str(file_versions_id) + "_" + commit_hash +"."+ file_extension subprocess.call(["srcml", local_file_copy, "-o", parsed_file_output]) cursor.execute("update file_versions set has_parsed_file = true where id = %s", (file_versions_id, )) connection.commit() connection.close()
def snapshot_the_repo(repository_name, tag): repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name tag_path = repository_path + "_tags/" + tag checkout = "git checkout " + tag copy_repo = "cp -r ./ ../" + repository_name + "_tags/" + tag command = checkout + " ; " + copy_repo process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_path) process.communicate()[1].strip().decode("utf-8") clean_unwanted_files(tag_path)
def process_parseable_files(repository_id, repository_name): repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name file_regex = FileHandlerConfig.get_parameter('parseable_files_regex') for root, dirs, files in os.walk(repository_path): for file in files: file_matcher = re.match(file_regex, file) if file_matcher is not None: absolute_path = os.path.join(root, file).replace(repository_path + '/', '') file_id = insert_file(repository_id, file, absolute_path) print (absolute_path)
def parse_files_using_srcml(repository_id, repository_name): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id from files where repository_id = %s", (repository_id, )) files_results = cursor.fetchall() for file_line in files_results: file_id = file_line[0] file_versions_directory = DiretoryConfig.get_parameter( 'file_versions_directory') + repository_name parsed_files_directory = DiretoryConfig.get_parameter( 'parsed_files_directory') + repository_name create_directory(parsed_files_directory) cursor.execute( 'select id, commit_hash, version_path from file_versions where file_id = %s and has_parsed_file is false order by author_date', (file_id, )) file_vesions_result = cursor.fetchall() for file_versions_line in file_vesions_result: file_versions_id = file_versions_line[0] commit_hash = file_versions_line[1] version_path = file_versions_line[2] file_extension = version_path.split('.')[-1] local_file_copy = file_versions_directory + "/" + str( file_id) + "_" + str( file_versions_id ) + "_" + commit_hash + "." + file_extension parsed_file_output = parsed_files_directory + "/" + str( file_id) + "_" + str( file_versions_id ) + "_" + commit_hash + "." + file_extension subprocess.call( ["srcml", local_file_copy, "-o", parsed_file_output]) cursor.execute( "update file_versions set has_parsed_file = true where id = %s", (file_versions_id, )) connection.commit() connection.close()
def generate_training_dataset(): connection = PSQLConnection.get_connection() cursor = connection.cursor() default_nlp_path = DiretoryConfig.get_parameter('nlp_directory') training_dataset_path = default_nlp_path + NLPHandlerConfig.get_parameter('training_dataset_name') classification_types = NLPHandlerConfig.get_parameter('classification_types') cursor.execute("select classification, treated_comment_text from manually_classified_comments where classification in %s", [tuple(classification_types),]) write_formated_file(training_dataset_path, cursor.fetchall())
def list_repository_tags(repository_name): repository_path = DiretoryConfig.get_parameter( 'repository_directory') + repository_name command = "git log --tags --date-order --reverse --simplify-by-decoration --pretty=%ai%d" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True, cwd=repository_path) return process.communicate()[0].strip().decode("utf-8").split('\n')
def classify_comments(repository_id): default_nlp_path = DiretoryConfig.get_parameter('nlp_directory') test_dataset_path = default_nlp_path + NLPHandlerConfig.get_parameter('test_dataset_name') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select distinct(file_versions_id) from processed_comments where repository_id = %s", (repository_id, )) file_versions = cursor.fetchall() for file_version in file_versions: before = timeit.default_timer() file_versions_id = file_version[0] print("file version:", file_versions_id) cursor.execute("select 'WITHOUT_CLASSIFICATION' as classification, treated_comment_text, id from processed_comments where file_versions_id = %s and td_classification is null order by end_line", (file_versions_id, )) all_comments_from_file = cursor.fetchall() write_formated_file(test_dataset_path , all_comments_from_file) nlp_classifier_memory_use = NLPHandlerConfig.get_parameter('nlp_classifier_memory_use') command = 'java ' + nlp_classifier_memory_use + ' -jar stanford-classifier.jar -prop ./dataset.prop -1.useSplitWords -1.splitWordsRegexp "\s"' process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, cwd=default_nlp_path).communicate() subprocess.call("rm " + test_dataset_path , shell=True) output = process[0].strip().decode("utf-8").split('\n') # results = process[1].strip().decode("utf-8").split('\n') output_regex = NLPHandlerConfig.get_parameter('output_regex') comment_text_exact_regex = NLPHandlerConfig.get_parameter('comment_text_exact_regex') for comment in all_comments_from_file: treated_comment_text = comment[1] comment_id = comment[2] for line in output: comment_text_exact_matcher = re.match(comment_text_exact_regex, line) comment_text_from_output = comment_text_exact_matcher.group(1) if treated_comment_text == comment_text_from_output : output_without_comment = line.replace(treated_comment_text, '') output_matcher = re.findall(output_regex, line) if output_matcher is not None: golden_anwser = output_matcher[0].replace('\'', '') nlp_tool_classification = output_matcher[1].replace('\'', '') cursor.execute("update processed_comments set td_classification = %s where id = %s " , (nlp_tool_classification, comment_id) ) connection.commit() # print (golden_anwser , "-" , nlp_tool_classification) break after = timeit.default_timer() print (after - before)
def checkout_file_versions(repository_id, repository_name, master_branch): repository_directory = DiretoryConfig.get_parameter('repository_directory') + repository_name connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id from files where repository_id = %s", (repository_id, )) files_results = cursor.fetchall() for file_line in files_results: file_id = file_line[0] checkout_to_latest_version(repository_name, master_branch) file_versions_directory = DiretoryConfig.get_parameter('file_versions_directory') + repository_name create_directory(file_versions_directory) cursor.execute('select id, commit_hash, version_path from file_versions where file_id = %s and has_local_file is false order by author_date', (file_id, )) file_vesions_result = cursor.fetchall() for file_versions_line in file_vesions_result: file_versions_id = file_versions_line[0] commit_hash = file_versions_line[1] version_path = file_versions_line[2] file_extension = version_path.split('.')[-1] git_checkout = "git checkout " + commit_hash cp_file = "cp " + version_path + " ../" + file_versions_directory +"/"+ str(file_id)+ "_" + str(file_versions_id) + "_" + commit_hash +"."+ file_extension print (cp_file) command = git_checkout + ";" + cp_file process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_directory) git_log_output = process.communicate()[0].strip().decode("utf-8").split('\n') cursor.execute("update file_versions set has_local_file = true where id = %s", (file_versions_id, )) connection.commit() connection.close()
def search_deleted_files(repository_id, repository_name, master_branch): connection = PSQLConnection.get_connection() cursor = connection.cursor() repository_directory = DiretoryConfig.get_parameter('repository_directory') + repository_name git_deleted_log_file_regex = FileHandlerConfig.get_parameter('git_deleted_log_file_regex') file_regex = FileHandlerConfig.get_parameter('parseable_files_regex') command = "git log --diff-filter=D --summary" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_directory) git_log_output = process.communicate()[0].strip().decode("utf-8").split('\n') commit_hash = '' author_name = '' author_email = '' author_date = '' version_path = '' for git_log_output_line in git_log_output: # removes non ascii characters stripped = (c for c in git_log_output_line if 0 < ord(c) < 127) stripped_line = ''.join(stripped) git_log_file_matcher = re.match(git_deleted_log_file_regex, stripped_line) if git_log_file_matcher is not None: if git_log_file_matcher.group(1): commit_hash = git_log_file_matcher.group(1) # print (commit_hash) if git_log_file_matcher.group(2): author_name = git_log_file_matcher.group(2) # print (author_name) if git_log_file_matcher.group(3): author_email = git_log_file_matcher.group(3) # print (author_email) if git_log_file_matcher.group(4): author_date = git_log_file_matcher.group(4) # print (author_date) if git_log_file_matcher.group(5): version_path = git_log_file_matcher.group(5) file_regex_matcher = re.match(file_regex, version_path) if file_regex_matcher is not None: # print (version_path) cursor.execute("select count(*) from file_versions where older_version_path = %s and commit_hash = %s", (version_path, commit_hash)) found_in_database = cursor.fetchone()[0] if found_in_database == 0: print(found_in_database, version_path, commit_hash) file_name = version_path.split('/')[-1] file_id = insert_file(repository_id, file_name, version_path, commit_hash) if file_id is not None: execute_git_log_to_get_versions("git log "+commit_hash+"^ --follow --stat=350 --stat-graph-width=2 -- ", file_id, version_path, repository_directory)
def snapshot_the_repo(repository_name, tag): repository_path = DiretoryConfig.get_parameter( 'repository_directory') + repository_name tag_path = repository_path + "_tags/" + tag checkout = "git checkout " + tag copy_repo = "cp -r ./ ../" + repository_name + "_tags/" + tag command = checkout + " ; " + copy_repo process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True, cwd=repository_path) process.communicate()[1].strip().decode("utf-8") clean_unwanted_files(tag_path)
def extract_file_versions(repository_id, repository_name): repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name git_log_file_regex = FileHandlerConfig.get_parameter('git_log_file_regex') connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute('select id, file_path from files where repository_id = %s', (repository_id, )) files_results = cursor.fetchall() connection.close() for files_results_line in files_results: file_id = files_results_line[0] file_path = files_results_line[1] execute_git_log_to_get_versions("git log --follow --stat=350 --stat-graph-width=2 -- ", file_id, file_path, repository_path)
command = 'git clone ' + clone_url process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_default_directory) proc_stdout = process.communicate()[1].strip().decode('utf-8') print (proc_stdout) repository_name_matcher = re.search('\'(.*)\'', proc_stdout) return repository_name_matcher.group(1) def get_repository_master_branch(repository_name): repository_path = repository_default_directory + repository_name command = 'cat .git/HEAD' process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_path) proc_stdout = process.communicate()[0].strip() master_branch_name_matcher = re.search('ref:.*\/(.*)', str(proc_stdout)) return master_branch_name_matcher.group(1).replace('\'', '') def insert_cloned_repo_info(repository_name, master_branch): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("insert into repositories (name, clone_url, master_branch) values (%s, %s, %s)", (repository_name, clone_url, master_branch)) connection.commit() connection.close() clone_url = sys.argv[1] repository_default_directory = DiretoryConfig.get_parameter('repository_directory') create_repository_directory(repository_default_directory) if has_to_clone_repository(clone_url): repository_name = clone_repository(clone_url) get_repository_master_branch = get_repository_master_branch(repository_name) insert_cloned_repo_info(repository_name, get_repository_master_branch)
def search_authors(repository_id, repository_name): before = timeit.default_timer() connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select file_id, treated_comment_text from processed_comments where repository_id = %s and td_classification != 'WITHOUT_CLASSIFICATION' group by 1,2 order by 1 ", (repository_id, )) files = cursor.fetchall() for file in files: file_id = file[0] treated_comment_text = file[1] print("file id:", file_id) print("treated_comment_text:", treated_comment_text) iteration_counter = 0 has_removed_version = False is_introduced_version = False removed_version_commit_hash = '' introduced_version_commit_hash = '' introduced_version_processed_comment_id = '' cursor.execute("select a.id, b.author_date, b.commit_hash, b.author_name from processed_comments a, file_versions b where a.file_versions_id = b.id and a.file_id = %s and a.treated_comment_text = %s order by 1", (file_id, treated_comment_text)) all_file_versions = cursor.fetchall() for file_version_line in all_file_versions: iteration_counter = iteration_counter + 1 processed_comment_id = file_version_line[0] author_date = file_version_line[1] commit_hash = file_version_line[2] author_name = file_version_line[3] if introduced_version_commit_hash == '': is_introduced_version = True introduced_version_commit_hash = commit_hash introduced_version_processed_comment_id = processed_comment_id else: is_introduced_version = False cursor.execute("update processed_comments set introduced_version_commit_hash = %s, is_introduced_version = %s, introduced_version_author = %s, introduced_version_date = %s where id = %s", (introduced_version_commit_hash, is_introduced_version, author_name, author_date, processed_comment_id)) connection.commit() if iteration_counter == len(all_file_versions): cursor.execute ("select id, commit_hash, author_name, author_date from file_versions where file_id = %s and author_date > %s order by author_date", (file_id, author_date)) remaining_file_versions = cursor.fetchall() if len(remaining_file_versions) > 0: removed_version_commit_hash = remaining_file_versions[0][1] removed_version_author = remaining_file_versions[0][2] removed_version_date = remaining_file_versions[0][3] has_removed_version = True cursor.execute("update processed_comments set removed_version_commit_hash = %s, has_removed_version = %s, removed_version_author = %s, removed_version_date = %s where id = %s", (removed_version_commit_hash, has_removed_version, removed_version_author, removed_version_date, introduced_version_processed_comment_id)) connection.commit() else: cursor.execute("select deletion_commit_hash from files where id = %s", (file_id,)) file_commit_hash_result = cursor.fetchone() if file_commit_hash_result[0] is not None: repository_directory = DiretoryConfig.get_parameter('repository_directory') + repository_name git_log_file_regex = TDAuthorsHandlerConfig.get_parameter('git_log_file_regex') removed_version_commit_hash = file_commit_hash_result[0] has_removed_version = True git_log = "git log -1 " + removed_version_commit_hash process = subprocess.Popen(git_log, stdout=subprocess.PIPE, shell=True, cwd= repository_directory) proc_stdout = process.communicate()[0].strip().decode('utf-8').split('\n') for proc_stdout_line in proc_stdout: git_log_file_matcher = re.match(git_log_file_regex, proc_stdout_line) if git_log_file_matcher is not None: if git_log_file_matcher.group(2): git_commit_author = git_log_file_matcher.group(2) if git_log_file_matcher.group(4): git_commit_date = git_log_file_matcher.group(4) cursor.execute("update processed_comments set removed_version_commit_hash = %s, has_removed_version = %s, removed_version_author = %s, removed_version_date = to_timestamp(%s, 'Dy Mon DD HH24:MI:SS YYYY +-####') where id = %s", (removed_version_commit_hash, has_removed_version, git_commit_author, git_commit_date, introduced_version_processed_comment_id)) connection.commit() else: cursor.execute("update processed_comments set has_removed_version = %s where id = %s", (has_removed_version, introduced_version_processed_comment_id)) connection.commit()
def extract_comments(repository_id, repository_name): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id from files where repository_id = %s ", (repository_id, )) files_results = cursor.fetchall() for file_line in files_results: file_id = file_line[0] parsed_files_directory = DiretoryConfig.get_parameter('parsed_files_directory') + repository_name cursor.execute('select id, commit_hash, version_path from file_versions where file_id = %s and has_parsed_file is true order by author_date', (file_id, )) file_vesions_result = cursor.fetchall() for file_versions_line in file_vesions_result: class_declaration_lines = [] has_class_declaration = False has_interface_declaration = False has_enum_declaration = False has_annotation_declaration = False file_versions_id = file_versions_line[0] commit_hash = file_versions_line[1] version_path = file_versions_line[2] file_extension = version_path.split('.')[-1] parsed_file_output = parsed_files_directory +"/"+ str(file_id) + "_" + str(file_versions_id) + "_" + commit_hash +"."+ file_extension print(parsed_file_output) try: tree = etree.parse(parsed_file_output) root = tree.getroot() except Exception as e: print(e) for element in root.iter("{http://www.srcML.org/srcML/src}class"): class_declaration_line = element.sourceline -1 class_declaration_lines.append(str(class_declaration_line)) has_class_declaration = True for element in root.iter("{http://www.srcML.org/srcML/src}interface"): class_declaration_line = element.sourceline -1 class_declaration_lines.append(str(class_declaration_line)) has_interface_declaration = True for element in root.iter("{http://www.srcML.org/srcML/src}enum"): class_declaration_line = element.sourceline -1 class_declaration_lines.append(str(class_declaration_line)) has_enum_declaration = True for element in root.iter("{http://www.srcML.org/srcML/src}annotation_defn"): class_declaration_line = element.sourceline -1 class_declaration_lines.append(str(class_declaration_line)) has_annotation_declaration = True for element in root.iter("{http://www.srcML.org/srcML/src}comment"): start_line = element.sourceline -1 comment_text = element.text comment_type = element.get("type") comment_format = element.get("format") if comment_type == 'line': end_line = start_line else: next_element = element.getnext() if next_element is not None: end_line = next_element.sourceline -2 else: end_line = start_line cursor.execute("insert into raw_comments (repository_id,file_id, file_versions_id, commit_hash, comment_text, comment_type, comment_format, start_line, end_line, has_class_declaration, has_interface_declaration, has_enum_declaration, has_annotation_declaration, class_declaration_lines) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (repository_id, file_id, file_versions_id, commit_hash, comment_text, comment_type, comment_format, start_line, end_line, has_class_declaration, has_interface_declaration, has_enum_declaration, has_annotation_declaration, ','.join(class_declaration_lines))) connection.commit() connection.close()
stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True, cwd=repository_path) proc_stdout = process.communicate()[0].strip() master_branch_name_matcher = re.search('ref:.*\/(.*)', str(proc_stdout)) return master_branch_name_matcher.group(1).replace('\'', '') def insert_cloned_repo_info(repository_name, master_branch): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute( "insert into repositories (name, clone_url, master_branch) values (%s, %s, %s)", (repository_name, clone_url, master_branch)) connection.commit() connection.close() clone_url = sys.argv[1] repository_default_directory = DiretoryConfig.get_parameter( 'repository_directory') create_repository_directory(repository_default_directory) if has_to_clone_repository(clone_url): repository_name = clone_repository(clone_url) get_repository_master_branch = get_repository_master_branch( repository_name) insert_cloned_repo_info(repository_name, get_repository_master_branch)
def checkout_to_latest_version(repository_name, master_branch): repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name command = "git checkout " + master_branch process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_path) return process.communicate()[0].strip().decode("utf-8").split('\n')
def delete_training_dataset(): default_nlp_path = DiretoryConfig.get_parameter('nlp_directory') training_dataset_path = default_nlp_path + NLPHandlerConfig.get_parameter('training_dataset_name') subprocess.call("rm " + training_dataset_path , shell=True)
def extract_comments(repository_id, repository_name): connection = PSQLConnection.get_connection() cursor = connection.cursor() cursor.execute("select id from files where repository_id = %s ", (repository_id, )) files_results = cursor.fetchall() for file_line in files_results: file_id = file_line[0] parsed_files_directory = DiretoryConfig.get_parameter( 'parsed_files_directory') + repository_name cursor.execute( 'select id, commit_hash, version_path from file_versions where file_id = %s and has_parsed_file is true order by author_date', (file_id, )) file_vesions_result = cursor.fetchall() for file_versions_line in file_vesions_result: class_declaration_lines = [] has_class_declaration = False has_interface_declaration = False has_enum_declaration = False has_annotation_declaration = False file_versions_id = file_versions_line[0] commit_hash = file_versions_line[1] version_path = file_versions_line[2] file_extension = version_path.split('.')[-1] parsed_file_output = parsed_files_directory + "/" + str( file_id) + "_" + str( file_versions_id ) + "_" + commit_hash + "." + file_extension print(parsed_file_output) try: tree = etree.parse(parsed_file_output) root = tree.getroot() except Exception as e: print(e) for element in root.iter("{http://www.srcML.org/srcML/src}class"): class_declaration_line = element.sourceline - 1 class_declaration_lines.append(str(class_declaration_line)) has_class_declaration = True for element in root.iter( "{http://www.srcML.org/srcML/src}interface"): class_declaration_line = element.sourceline - 1 class_declaration_lines.append(str(class_declaration_line)) has_interface_declaration = True for element in root.iter("{http://www.srcML.org/srcML/src}enum"): class_declaration_line = element.sourceline - 1 class_declaration_lines.append(str(class_declaration_line)) has_enum_declaration = True for element in root.iter( "{http://www.srcML.org/srcML/src}annotation_defn"): class_declaration_line = element.sourceline - 1 class_declaration_lines.append(str(class_declaration_line)) has_annotation_declaration = True for element in root.iter( "{http://www.srcML.org/srcML/src}comment"): start_line = element.sourceline - 1 comment_text = element.text comment_type = element.get("type") comment_format = element.get("format") if comment_type == 'line': end_line = start_line else: next_element = element.getnext() if next_element is not None: end_line = next_element.sourceline - 2 else: end_line = start_line cursor.execute( "insert into raw_comments (repository_id,file_id, file_versions_id, commit_hash, comment_text, comment_type, comment_format, start_line, end_line, has_class_declaration, has_interface_declaration, has_enum_declaration, has_annotation_declaration, class_declaration_lines) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (repository_id, file_id, file_versions_id, commit_hash, comment_text, comment_type, comment_format, start_line, end_line, has_class_declaration, has_interface_declaration, has_enum_declaration, has_annotation_declaration, ','.join(class_declaration_lines))) connection.commit() connection.close()
(repository_id, name, version_date, version_order)) connection.commit() connection.close() tags_regex = '(\d\d\d\d\-\d\d\-\d\d\s\d\d:\d\d:\d\d)|\(tag:\s([A-Za-z0-9\-\_\.+]*)\)' repository_list = repositoryfetch_repositories() for repository_entry in repository_list: repository_id = repository_entry[0] repository_name = repository_entry[1] repository_url = repository_entry[2] repository_cloned_date = repository_entry[3] tag_entry_list = list_repository_tags(repository_name) tags_directory = DiretoryConfig.get_parameter( 'repository_directory') + repository_name + "_tags/" create_directory(tags_directory) version_order = 0 for tag_entry in tag_entry_list: if re.search(tags_regex, tag_entry) is not None: matche_groups = re.findall(tags_regex, tag_entry) """It has to have match for tag and date (merge has date but not tag)""" if len(matche_groups) == 2: tag_date = matche_groups[0][0] tag = matche_groups[1][1] create_directory(tags_directory + tag) snapshot_the_repo(repository_name, tag) insert_snapshot_version_info(repository_id, tag, tag_date, version_order)
def list_repository_tags(repository_name): repository_path = DiretoryConfig.get_parameter('repository_directory') + repository_name command = "git log --tags --date-order --reverse --simplify-by-decoration --pretty=%ai%d" process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr = subprocess.PIPE, stdin = subprocess.PIPE, shell=True, cwd=repository_path) return process.communicate()[0].strip().decode("utf-8").split('\n')
cursor = connection.cursor() cursor.execute("insert into tags (repository_id, name, version_date, version_order) values (%s, %s, to_timestamp(%s, 'YYYY-MM-DD HH24:MI:SS'), %s)", (repository_id, name, version_date, version_order)) connection.commit() connection.close() tags_regex = '(\d\d\d\d\-\d\d\-\d\d\s\d\d:\d\d:\d\d)|\(tag:\s([A-Za-z0-9\-\_\.+]*)\)' repository_list = repositoryfetch_repositories() for repository_entry in repository_list: repository_id = repository_entry[0] repository_name = repository_entry[1] repository_url = repository_entry[2] repository_cloned_date = repository_entry[3] tag_entry_list = list_repository_tags(repository_name) tags_directory = DiretoryConfig.get_parameter('repository_directory') + repository_name + "_tags/" create_directory(tags_directory) version_order = 0 for tag_entry in tag_entry_list: if re.search(tags_regex, tag_entry) is not None: matche_groups = re.findall(tags_regex, tag_entry) """It has to have match for tag and date (merge has date but not tag)""" if len(matche_groups) == 2: tag_date = matche_groups[0][0] tag = matche_groups[1][1] create_directory(tags_directory + tag) snapshot_the_repo(repository_name, tag) insert_snapshot_version_info(repository_id, tag, tag_date, version_order) version_order = version_order + 1