def build_non_adaptive_context(): non_adaptive_header_action = "|".join( ['transla(?:tion|et|eted|ets|ting)', 'readme(?:.md)?']) non_adaptive_header = '^[\s\S]{0,50}(%s)' % non_adaptive_header_action entities = documentation_entities + [ 'bug', 'helper', 'miss(?:ing|ed)', 'to(?: |-)?do(?:s)?', 'warning(?:s)?' ] adaptive_actions = ['remov(?:e|es|ed|ing)'] non_adaptive_entities = documentation_entities + software_terms + unnedded_terms + [ file_scheme ] return '(%s)' % "|".join([ '(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_sepereted_term(adaptive_context, just_before=True), "|".join(entities)), non_adaptive_header, '(?:%s)\s[\s\S]{0,50}(?:%s)' % (build_sepereted_term(adaptive_actions, just_before=True), "|".join(non_adaptive_entities)) ])
def built_is_refactor(commit_text): removal_re = build_sepereted_term(removal) return (match(commit_text, build_refactor_regex()) + match(commit_text, removal_re) + match(commit_text, build_refactor_goals_regex()) - match(commit_text, build_non_code_perfective_regex()) - match(commit_text, build_documentation_entities_context(build_refactor_regex())) - match(commit_text, build_non_positive_linguistic(build_refactor_regex())) - match(commit_text, build_non_positive_linguistic(build_sepereted_term(removal))) - match(commit_text, build_non_positive_linguistic(build_refactor_goals_regex())) ) > 0
def build_refactor_regex(): header_regex = '(?:^|^[\s\S]{0,25}%s)(?:%s)%s' % (term_seperator , "|".join(perfective_header_action) , term_seperator) activity_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_sepereted_term(modification_activity , just_before=True) , term_seperator , term_seperator , term_seperator , "|".join(refactor_entities) , term_seperator) return "(%s)|(%s)|(%s)" % (build_sepereted_term(refactor_context) , activity_regerx , header_regex)
def non_positive_linguistic_removal_to_bq(): print( "# Refactor :build_non_positive_linguistic(build_sepereted_term(removal))" ) print( regex_to_big_query( build_non_positive_linguistic(build_sepereted_term(removal))))
def build_documentation_entities_context(positive_re): return '(?:%s)' % "|".join([ # TODO - take care of documentation entities spereatly '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term( documentation_entities, just_before=True), positive_re) ])
def build_adaptive_regex(): adaptive_context_re = build_sepereted_term(adaptive_context, just_before=True) return "((%s)\s[\s\S]{0,50}(%s)%s)" % (adaptive_context_re , "|".join(adaptive_entities + software_terms) , term_seperator)
def build_non_positive_linguistic(positive_re): non_actionable_context = ['for(?:get|gets|got|geting)' , 'allow(s|ed|ing)?'] return '(?:%s)' % "|".join([ '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(modals, just_before=True) , positive_re) , '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(negation_terms, just_before=True) , positive_re) , '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(non_actionable_context, just_before=True) , positive_re) # TODO - take care of documentation entities spereatly #, '(?:%s)[\s\S]{0,10}(?:%s)' % (build_sepereted_term(documentation_entities, just_before=True) # ,positive_re) ])
def build_refactor_goals_regex(): goals_regerx = "(?:(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)%s)" % (build_sepereted_term(software_goals_modification , just_before=True) , term_seperator , term_seperator , term_seperator , "|".join(software_goals) , term_seperator) return goals_regerx
def build_bug_fix_regex(): header_regex = '(?:^|^[\s\S]{0,25}%s)(?:%s)%s' % (term_seperator , "|".join(corrective_header_entities) , term_seperator) # strict_header = "^(?:%s)%s" % ( "|".join([ "do not" ,"don't"]) # , term_seperator) bug_fix_re = build_sepereted_term(bug_terms) return "((%s)|(%s))" % (bug_fix_re, header_regex)
def build_non_code_perfective_regex(): non_perfective_entities = ['warning(?:s)?' , 'format(?:ting)?' , 'indentation(?:s)?' ] # TODO - applied to perfective entities too here, which is a bug. modification_action = ['clean(?:-| )?up(?:s)?'] non_perfective_context = [ 'fix(?:es|ed|ing)?' ,'(?:get|got|getting) rid' , 'support(?:s|ed|ing)?' ] modifiers = modification_activity + non_perfective_context activity_regerx = "((?:%s)(?:\s|%s[\s\S]{0,50}%s)(?:%s))" % (build_sepereted_term(modifiers, just_before=True) , term_seperator , term_seperator , "|".join(prefective_entities + non_perfective_entities)) doc_header_regex = '(?:^|^[\s\S]{0,25}%s)(?:%s)[\s\S]{0,25}(?:%s)' % (term_seperator , "|".join(perfective_header_action) , build_sepereted_term(documentation_entities)) no_prefective_action = "|".join([ 'convert(?:ed|s|ing)?(?:%s|%s[\s\S]{0,50}%s)support(?:s|ed|ing)?' % ( term_seperator,term_seperator, term_seperator) , '(?:make|made|making|makes)(?:%s|%s[\s\S]{0,50}%s)work' % (term_seperator, term_seperator, term_seperator) , '(?:make|made|making|makes)(?:%s|%s[\s\S]{0,50}%s)sense' % (term_seperator, term_seperator, term_seperator) , 'improv(?:e|es|ed|ing) handling' , '(?:%s)(?:%s|%s[\s\S]{0,50}%s)(?:%s)' %(build_sepereted_term(non_perfective_entities,just_before=True) ,term_seperator , term_seperator , term_seperator , "|".join(modification_action) ) , doc_header_regex ]) non_perfective_context = '(?:%s|%s)' % (no_prefective_action , activity_regerx) return non_perfective_context
def positive_refactor_to_bq(): print("# Refactor :build_refactor_regex()") #print( ",") print(regex_to_big_query(build_refactor_regex())) print("# Refactor :build_sepereted_term(removal)") print("+") print(regex_to_big_query(build_sepereted_term(removal))) print("# Refactor :build_refactor_goals_regex()") print("+") print(regex_to_big_query(build_refactor_goals_regex()))
def build_adaptive_regex(use_conventional_commits=True): adaptive_context_re = build_sepereted_term(adaptive_context, just_before=True) base_re = "((%s)\s[\s\S]{0,50}(%s)%s)" % (adaptive_context_re, "|".join( adaptive_entities + software_terms), term_seperator) if use_conventional_commits: agg_re = "((%s)|(%s))" % (base_re, build_cc_adaptive_regex()) else: agg_re = base_re return agg_re
def build_bug_fix_regex(use_conventional_commits=True): header_regex = '(?:^|^[\s\S]{0,25}%s)(?:%s)%s' % ( term_seperator, "|".join(corrective_header_entities), term_seperator) # strict_header = "^(?:%s)%s" % ( "|".join([ "do not" ,"don't"]) # , term_seperator) bug_fix_re = build_sepereted_term(bug_terms) if use_conventional_commits: agg_re = "((%s)|(%s)|(%s))" % (bug_fix_re, header_regex, build_cc_corrective_regex()) else: agg_re = "((%s)|(%s))" % (bug_fix_re, header_regex) return agg_re
def build_valid_find_regex(): fix_re = "(" + "|".join(fixing_verbs + [MERGE_PREFIX]) + ")" prefix = term_seperator + fix_re + '[\s\S]{1,40}' + "(" + "|".join(valid_fix_object) + ")" + term_seperator suffix = term_seperator + "(" + "|".join \ (valid_fix_object) + ")" + term_seperator + '[\s\S]{0,40}' + term_seperator + fix_re + term_seperator # TODO - check seperation #sepertion = '(?:%s|%s[\s\S]{0,40}%s)' % (term_seperator, term_seperator, term_seperator) #suffix = "(" + "|".join \ # (valid_fix_object) + ")" + sepertion + fix_re + term_seperator #other_valid_re = "(%s)" % "|".join(valid_terms) other_valid_re = build_sepereted_term(valid_terms) return "((%s)|(%s)|(%s))" % (prefix, suffix, other_valid_re)
def build_excluded_regex(): return build_sepereted_term(excluded_terms)
def build_positive_regex(): return build_sepereted_term(positive_terms)
def build_software_goals_regex(): return build_sepereted_term(software_goals)
def build_excluded_abstraction_regex(): return build_sepereted_term(excluded_abstraction_terms)
'clean(?:ing|s|ed)?(?:-| )?up(?:s)?', 'cleaner', 'deprecat(?:e|es|ed|ing)', 'extract(?:ed|s|ing)?', 're(?:-|)?organiz(?:e|es|ed|ing)', 're(?:-|)?structur(?:e|es|ed|ing)', 'tid(?:y|ying|ied) up', 'improv(?:e|ed|es|ing|ement|ements)', 're(?:-|)?organiz(?:e|es|ed|ing)', 're(?:-|)?structur(?:e|es|ed|ing)', '(helper|utility|auxiliary) function(?:s)?', '(?:move|moved|moves|moving) to', 'separat(?:e|es|ed|ing)', 'split(?:s|ing)?', '->', build_sepereted_term(static_analyzers) + 'fix(es|ed)?', 'fix(es|ed)?' + build_sepereted_term(static_analyzers) #, '(private|public|protected|static)' ] # TODO - rewrited, move into/out???, deduplicate, remove legacy, redo, PR, feedback # TODO - clean , style, prettier, "->", refine, "Removed commented code", "More startup improvements.", recode # ""Remove another old function", "improved redis error message", utility functions, never used # Checkstyle # TODO - perfective, not refactor - ident, spacing, tabs, "tabs -> spaces", cosmetic, ""*** empty log message ***" # examples ""DOC: remove mention of TimeSeries in docs" # TODO - add "resolving review comments"
valid_terms = [ 'break\sout', 'error(?: |-)?check(ing)?', 'error(?: |-)?handling', 'error message(s)?', 'error report(s|ing)?', 'fixed(?: |-)?point', 'fix(?:ed) ticket(?:s)?', #'format(ing)?', '(?:fix(?:ed|es)?|bug)(?: )?(?: |-|=|:)(?: )?[a-z]{0,3}(?:-)?\d+' + term_seperator, '(if|would)[\s\S]{0,40}go wrong', 'line(?:s)? break(?:s)?', 'typo(s)?\sfix(es)?', 'fix(ed|es|ing)?' + build_sepereted_term(software_entities) + 'name(s)?', build_sepereted_term(static_analyzers) + 'fix(es|ed)?', 'fix(es|ed)?' + build_sepereted_term(static_analyzers), '^### Bug Fix', # tends to be a title, later stating if the commit is a bug fix 'edit the jira link to the correct issue', # Another occurring title 'page(?:s)? break(?:s)?', ] + code_review_fixes fixing_verbs = [ 'correct(?:ing|s|ed)', 'fix(ed|s|es|ing)?', 'repair(?:ing|s|ed)?', 'revert(?:ing|s|ed)?', 'resolv(?:ing|e|es|ed)', 'revok(?:ing|e|es|ed)', 'und(?:oing|id)' ] MERGE_PREFIX = '(merge (branch|pull request).{0,25}|merge (branch|pull request).{0,25}(from|into).{0,25})' END_OF_LINE = r'(\r\n|\r|\n|$)' corrective_header_entities = fixing_verbs + [
def build_negeted_bug_fix_regex(): bug_fix_re = build_bug_fix_regex(use_conventional_commits=False) negation_re = build_sepereted_term(negation_terms) return "%s[\s\S]{0,20}%s" % (negation_re, bug_fix_re)
def build_negeted_bug_fix_regex(): bug_fix_re = build_bug_fix_regex() negation_re = build_sepereted_term(negation_terms) return "%s[\s\S]{0,20}%s" % (negation_re, bug_fix_re)
def build_core_adaptive_regex(): return '(%s)' % build_sepereted_term(core_adaptive_terms)
def build_core_abstraction_regex(): return build_sepereted_term(core_abstraction_terms)
def build_core_refactor_regex(): return '(%s)' % build_sepereted_term(core_refactor_terms)
def build_software_entities_regex(): return build_sepereted_term(software_entities)
def build_formatting_regex(): return build_sepereted_term(formatting_terms)
def build_core_bug_regex(): return '(%s)' % build_sepereted_term(core_bug_terms)
def build_perfective_regex(): non_code = build_sepereted_term(prefective_entities) perfective = "(%s)" % non_code return perfective
def build_negative_sentiment_excluded_regex(): return build_sepereted_term(excluded_negative_sentiment)