Exemple #1
0
def test_decorate():
    @proto.decorate
    def test():
        r'''Test function

            test 1 x = x + 1
            test 2 x = x - 2
            test x y = 10
        '''

    expected = [
        (1, var('x')),
        (2, var('x')),
        (var('x'), var('y')),
    ]

    actual = [pattern for pattern, _, _ in test.patterns]

    for e, a in zip(expected, actual):
        assert match(e, a)
        assert match(a, e)

    assert test(1, 2) == 3
    assert test(2, 1) == -1
    assert test(5, 6) == 10
Exemple #2
0
def get_refmod_mine_nlm_matches(model1, model2):
    matches = []

    nodes1temp = commonFunctions.get_nodes(model1, consideredComponents)
    nodes2temp = commonFunctions.get_nodes(model2, consideredComponents)
    nodes1 = create_refmod_nodes(nodes1temp)
    nodes2 = create_refmod_nodes(nodes2temp)

    entries1 = [node_a.labelTokens for node_a in nodes1]
    entries2 = [node_b.labelTokens for node_b in nodes2]

    global wiki_entries
    wiki_entries = fetch_entry_from_wiktionary(entries1 + entries2)

    for n1 in nodes1:
        for n2 in nodes2:
            if is_identical_condition(n1, n2):
                matches.append(
                    matcher.match(node1=model1.diagram_graph.node[n1.id],
                                  node2=model2.diagram_graph.node[n2.id],
                                  score=1.0))
                continue
            if is_cross_category_condition(n1, n2):
                matches.append(
                    matcher.match(node1=model1.diagram_graph.node[n1.id],
                                  node2=model2.diagram_graph.node[n2.id],
                                  score=1.0))
                continue

    return matches
Exemple #3
0
def test_decorate_method():
    class TestClass(object):
        def __init__(self, value):
            self.value = value

        @proto.decorate_method
        def get_value():
            r'''Returns first argument, gets instance attribute 'value' otherwise.

                value x = x
                value   = self.value
            '''

    matcher = TestClass.__dict__['get_value']

    expected = [(var('x'), ), tuple()]

    actual = [pattern for pattern, _, _ in matcher.patterns]

    for e, a in zip(expected, actual):
        assert match(e, a)
        assert match(a, e)

    v = TestClass(2)
    assert v.get_value(1) == 1
    assert v.get_value() == 2
 def test_matcher_gets_reset_after_complex_match(self):
     matcher = self.example_comment_matcher()
     self.assertEqual(
         ' first comment ',
         matcher.match('/* first comment */').raw_value)
     self.assertEqual(
         ' second comment ',
         matcher.match('/* second comment */').raw_value)
Exemple #5
0
def main():
    st.title("Dataset")
    data, titles, texts = utils.load_data()  # careful, loads 12 GB of data
    preprocess_wiki_text(data, "preped_wikitexts.jsonl")  # Prep text from Wiki
    get_infobox_data(
        "./data/preped_wikitexts.jsonl",
        "./data/matched_texts.jsonl")  # Get Articles and Infoboxs mapped
    # or use for speedup => get_infobox_data_multi("./data/preped_wikitexts.jsonl", "./data/matched_texts.jsonl")
    matcher.match(
        "./data/matched_texts.jsonl", "/data/train_data.jsonl"
    )  # match sentences and triple (infobox values) => saving in data as jsonl
Exemple #6
0
def tampilDeadline(usrMsg):
    deadline = db.getList_Daftar_Tugas_Status(False)
    for i in range(len(deadline)):
        #General untuk tugas
        if(matcher.match(usrMsg,"tugas")):
            if(matcher.match(usrMsg,deadline[i][2].lower())):
                return deadline[i][1]
        #Spesifik, tucil, tubes atau pr
        else:
            if(matcher.match(usrMsg,deadline[i][2].lower()) and matcher.match(usrMsg,deadline[i][3].lower())):
                return deadline[i][1]

    return "Tidak ada deadline itu"
Exemple #7
0
def callback_boxes(message):
    d = jsonpickle.decode(message.data)
    print("@", time.time(), "\n", d)

    matched = match(d, clusters)

    fus_pub.publish(jsonpickle.encode(matched))
Exemple #8
0
 def test_link_is_encoded_as_phrase(self):
     links_match = []
     for para in paras_from('bookmark with link.docx'):
         phrase_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x["type"] == "phrase")
         for content in phrase_contents:
             links_match.append(
                 matcher.match(in_para_phrase.content_regex, content))
     self.assertAllAreOk(links_match)
Exemple #9
0
 def test_bookmark_is_encoded_as_anchor(self):
     anchors_match = []
     for para in paras_from('anchor.docx'):
         anchor_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x['type'] == "anchor")
         for content in anchor_contents:
             anchors_match.append(
                 matcher.match(in_para_bookmark.content_regex, content))
     self.assertAllAreOk(anchors_match)
Exemple #10
0
 def test_reference_is_encoded_as_external(self):
     extrefs_match = []
     for para in paras_from('externalref.docx'):
         extref_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x["type"] == "extref")
         for content in extref_contents:
             extrefs_match.append(
                 matcher.match(in_para_externalref.content_regex, content))
     self.assertAllAreOk(extrefs_match)
Exemple #11
0
 def test_link_to_html_is_encoded_as_phrase(self):
     links_match = []
     para_with_link = paras_from('link to html.docx')[0]
     phrase_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para_with_link), lambda x: x["type"] == "phrase")
     for content in phrase_contents:
         links_match.append(
             matcher.match(in_para_phrase.content_regex, content))
     self.assertAllAreOk(links_match)
Exemple #12
0
def daftar_katakunci(text):
    data = []
    textlist = text.split(" ")
    for kata in bd.getList_Kata_Penting()[1:]:
        for i in range(len(textlist)):
            index = matcher.match(textlist[i].lower(), kata.lower())
            if (index and len(kata) + 1 >= len(textlist[i]) + 1):
                data.append(kata)
                break
        if (index):
            break

    for kata in bd.getList_Kata_Tampil_Deadline():
        for i in range(len(textlist)):
            index = matcher.match(textlist[i].lower(), kata.lower())
            if (index):
                data.append(kata)

    return data
Exemple #13
0
def process(usrMsg):
    result = at.ValidasiInput(usrMsg)
    if(result =="-1"):
        text = str(usrMsg).lower()

        #Menampilkan help
        for pattern in kata_help:
            if (matcher.match(text, pattern)) :
                return help()

        #Menandai task selesai
        for pattern in kata_task_selesai:
            if (matcher.match(text, pattern)) :
                return tandaiTask(text)    

        #Menampilkan tanggal deadline suatu task
        for pattern in kata_tampil_deadline:
            if (matcher.match(text, pattern)) :
                return tampilDeadline(text)

        kata_penting = db.getList_Kata_Help()
        kata_penting += db.getList_Kata_Tampil_Deadline()
        kata_penting += db.getList_Kata_Task_Selesai()

        kata_input = text.split(" ")
        found = False
        for kata in kata_input:
            for pattern in kata_penting:
                if kata not in kata_penting:
                    if matcher.similarity(pattern, kata) >= 0.75:
                        text = text.replace(kata, pattern)
                        found = True

        if found:
            return "Mungkin maksud kamu:\n" + text

        return "Maaf, pesan tidak dikenali"
        
    else:
        return result
Exemple #14
0
def diundurTask(usrMsg):
    found = False
    text = str(usrMsg).split(" ")
    for i in range(len(text)):
        if (matcher.match(text[i], "undur") or matcher.match(text[i], "ubah")):

            if (len(text[i + 2]) > 2):
                tanggal = text[i + 2]

            else:
                bulan_int = bulan.get(text[i + 3].lower())
                tanggal = text[i + 2] + "/" + bulan_int + "/" + text[i + 4]

            (tgl, bln, th) = re.split("/", tanggal)
            date = datetime.date(int(th), int(bln), int(tgl))
            bd.update_Daftar_Tugas(text[i - 1], date)
            found = True
            output = "Deadline Tugas ID " + text[i - 1] + "<br>"
            output += "berhasil diperbarui menjadi " + str(date) + " <br>"
            return output
    if (found == False):
        return "-1"
def prune_results(res_1, res_2, threshold=3.0):
    to_return = set()
    res_1_all_values = []
    for value in res_1.values():
        res_1_all_values = res_1_all_values + value

    res_2_all_values = []
    for value in res_1.values():
        res_2_all_values = res_2_all_values + value

    for entity in res_1.keys():
        do_rule_3 = res_1[entity].__len__()
        for match in res_1[entity]:
            # RULE 1
            if res_1_all_values.__contains__(match) and res_2_all_values.__contains__(match):
                to_return.add(
                    matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_2.keys()), score=1.0))
            # RULE 2
            elif res_1_all_values.__contains__(match) or res_2_all_values.__contains__(match):
                if match.score >= threshold:
                    to_return.add(
                        matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_2.keys()),
                                      score=1.0))
            else:
                do_rule_3 = do_rule_3 - 1
        # RULE 3
        if do_rule_3 == 0:
            for match in get_two_best(res_1[entity]):
                to_return.add(
                    matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_2.keys()), score=1.0))

    for entity in res_2.keys():
        do_rule_3 = res_2[entity].__len__()
        for match in res_2[entity]:
            # RULE 1
            if res_1_all_values.__contains__(match) and res_2_all_values.__contains__(
                    match):
                to_return.add(
                    matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_1.keys()), score=1.0))
            # RULE 2
            elif res_1_all_values.__contains__(match) or res_2_all_values.__contains__(match):
                if match.score >= threshold:
                    to_return.add(
                        matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_1.keys()),
                                      score=1.0))
            else:
                do_rule_3 = do_rule_3 - 1
        # RULE 3
        if do_rule_3 == 0:
            for match in get_two_best(res_2[entity]):
                to_return.add(
                    matcher.match(node1=entity.node, node2=get_node_from_entities(match.id2, res_1.keys()), score=1.0))

    return to_return
def getMatchSSSmatches(diagram1, diagram2, threshold=0.5):
    # bag has structure: {nodeId: normalizedLabel}
    bag1 = extract_normalize_step(diagram1, activeComponents)

    bag2 = extract_normalize_step(diagram2, activeComponents)

    matches_alpha = calculate_similarity_step(bag1, bag2)

    # Identify step
    matches_final = []

    for m in matches_alpha:
        if m.similiraty_score >= threshold:
            matches_final.append(
                matcher.match(node1=diagram1.diagram_graph.node[m.id1],
                              node2=diagram2.diagram_graph.node[m.id2],
                              score=m.similiraty_score))

    return matches_final
Exemple #17
0
def student_listing_matcher(student, listing):
    """
    Match student to listing by student fields and desired listing fields
    :param student: Student obj
    :param listing: Listing obj
    :return: ratio between 0 and 1
    """
    same_job_type = False
    for i in student.looking_for:
        if i in listing.job_type:
            same_job_type = True
    if not same_job_type:
        return 0

    if len(student.skills) == 0:
        return 0

    skill_ratio = matcher.match(student.skills, listing.desired_skills)
    return skill_ratio
Exemple #18
0
def antaraTanggal_Jenis(text, jenis):
    data = []
    textlist = text.split(" ")
    for i in range(len(textlist)):
        index = matcher.match(textlist[i].lower(), "antara")
        if (index):
            if len(textlist[i + 1]) > 2:
                data.append(textlist[i + 1])
                data.append(str(textlist[i + 3]).replace("?", ""))

            else:
                bulan_int = bulan.get(textlist[i + 1].lower())
                tanggal = textlist[i +
                                   1] + "/" + bulan_int + "/" + textlist[i + 3]
                data.append(tanggal)

                bulan_int = bulan.get(textlist[i + 3].lower())
                tanggal = textlist[i +
                                   1] + "/" + bulan_int + "/" + textlist[i + 3]
                data.append(tanggal)

            break
    if (len(data) == 2):
        (tgl, bln, th) = re.split("/", data[0])
        date1 = datetime.date(int(th), int(bln), int(tgl))
        (tgl, bln, th) = re.split("/", data[1])
        date2 = datetime.date(int(th), int(bln), int(tgl))
        # rubah rertun list
        output = "[Menampilkan daftar " + jenis + str(date1) + " - " + str(
            date2) + "] <br>"
        daftar = bd.getList_Daftar_Tugas_Jenis_tgl(jenis, date1, date2, False)
        if (len(daftar) == 0):
            return "Tidak ada " + jenis + " antara " + str(
                date1) + " - " + str(date2)

        for tugas in daftar:
            output += "(ID: " + tugas[0] + ") " + tugas[1] + " " + tugas[
                2] + " " + tugas[3] + " <br>"

        return output
    else:
        return "-1"
    def filelist(self, roots, **kwargs):
        '''
        Parameters
        ----------
        roots:
            file_roots, pillar_roots, cache_roots, etc to walk

        kwargs:
            Contains any extra variables to pass to element
        '''
        for root, abspath in walk(roots):
            element = self.element(root, abspath, **kwargs)

            if self.match_each and not all(
                    matcher.match([element], self.pattern)):
                continue

            self.add_element(element, **kwargs)

        return self.as_sequence
def get_matches_by_combine(activity_pairs_all, bots_results, bot_thresholds,
                           best, second_best, model_pairs):
    final_matches = dict()
    for pair in model_pairs:
        final_matches[pair] = []

    for act_pair in activity_pairs_all:
        # bots_results[best][act_pair] , return similarity score of act_pair in best BOT configufarion
        if bots_results[best][act_pair] >= bot_thresholds[best][0] or bots_results[second_best][act_pair] >= \
                bot_thresholds[second_best][0]:
            sim_score = max(bots_results[best][act_pair],
                            bots_results[second_best][act_pair])
            node1 = act_pair.model_pair.bpmn1.diagram_graph.node[
                act_pair.node1_id]
            node2 = act_pair.model_pair.bpmn2.diagram_graph.node[
                act_pair.node2_id]
            final_matches[act_pair.model_pair].append(
                matcher.match(node1=node1, node2=node2, score=sim_score))

    return final_matches
Exemple #21
0
def get_triple_s_matches(bpmn1,
                         bpmn2,
                         syntactic_weight=0.5,
                         semantic_weight=0.35,
                         ratio_weight=0.05,
                         position_weight=0.1,
                         threshold=0.5):
    matches = []
    nodes1 = commonFunctions.get_nodes(bpmn1, activeComponents)
    nodes2 = commonFunctions.get_nodes(bpmn2, activeComponents)
    graph1 = commonFunctions.get_graph_with_id_nodes(bpmn1)
    graph2 = commonFunctions.get_graph_with_id_nodes(bpmn2)

    for node_a in nodes1:
        for node_b in nodes2:
            l1 = node_a['node_name'].lower()
            l2 = node_b['node_name'].lower()
            syntactic_score = get_syntactic_score(l1, l2)
            semantic_score = get_semantic_score(l1, l2)

            structural_score = get_structural_score(
                node_a,
                node_b,
                graph_a=graph1,
                graph_b=graph2,
                bpmn_a=bpmn1,
                bpmn_b=bpmn2,
                ratio_weight=ratio_weight,
                position_weight=position_weight)

            final_score = syntactic_weight * syntactic_score + semantic_weight * semantic_score + structural_score

            if final_score >= threshold:
                matches.append(
                    matcher.match(node1=node_a,
                                  node2=node_b,
                                  score=final_score))

    return matches
Exemple #22
0
def haritask(text):
    found = False
    textlist = text.split(" ")
    for i in range(len(textlist)):
        index = matcher.match(textlist[i].lower(), "hari")
        if (index):
            found = True
            N = int(textlist[i - 1])
            dateEnd = nHariKedepan(N)
            output = "[Menampilkan Tugas " + str(N) + " hari ke depan]<br>"
            daftar = bd.getList_Daftar_Tugas_tgl(datetime.date.today(),
                                                 dateEnd, 0)

            if (len(daftar) == 0):
                return "Tidak ada " + "deadline" + " " + str(
                    N) + " hari ke depan"
            for tugas in daftar:
                output += "(ID: " + tugas[0] + ") " + tugas[1] + " " + tugas[
                    2] + " " + tugas[3] + " <br>"
            return output
    if (not found):
        return "-1"
    def filelist(self, roots, **kwargs):
        '''
        Parameters
        ----------
        roots:
            file_roots, pillar_roots, cache_roots, etc to walk

        kwargs:
            Contains any extra variables to pass to element
        '''
        for root, abspath in walk(roots):
            element = self.element(root, abspath, **kwargs)

            if self.match_each and not all(
                matcher.match(
                    [element], self.pattern
                )
            ):
                continue

            self.add_element(element, **kwargs)

        return self.as_sequence
Exemple #24
0
def minggutask_Jenis(text, jenis):
    found = False
    textlist = text.split(" ")
    for i in range(len(textlist)):
        index = matcher.match(textlist[i].lower(), "minggu")
        if (index):
            found = True
            N = int(textlist[i - 1])
            dateEnd = nHariKedepan(N * 7)
            output = "[Menampilkan " + jenis + " " + str(
                N) + " minggu ke depan]<br>"
            daftar = bd.getList_Daftar_Tugas_Jenis_tgl(jenis,
                                                       datetime.date.today(),
                                                       dateEnd, 0)

            if (len(daftar) == 0):
                return "Tidak ada " + jenis + " " + str(N) + " minggu ke depan"
            for tugas in daftar:
                output += "(ID: " + tugas[0] + ")" + tugas[1] + " " + tugas[
                    2] + " " + tugas[3] + "<br>"
            return output

    if (not found):
        return "-1"
Exemple #25
0
def parse(lines):
	"""Parses the given text lines and returns an AST that represents the simple
HTML document from the text.  Raises a ParseError if parsing fails.  Raises a
TokenizeError if tokenizing fails."""
	return match(SimpHtmlParser().parse(lines))
Exemple #26
0
import matcher

classSize = 4


matcher.studentMaker('A', 90, 'male', 5, 4, 4, [2], [4])
matcher.studentMaker('B', 90, 'female', 5, 4, 4, [1], [])
matcher.studentMaker('C', 65, 'male', 3, 2, 1, [4], [])
matcher.studentMaker('D', 65, 'female', 3, 2, 1, [3], [1])

#A and C
#B and D
matcher.groupMaker(classSize)

matcher.match()
matcher.printMatch()
Exemple #27
0
numPics = len(imageFiles)
print numPics

button_pressed = 1
button_released = 0
i = 0

while True:
    try:
        button_status = controller.check_button()
        if button_status == button_pressed:
            print "Got new photo!"
            controller.turn_light_on()
            if (i >= numPics):
                i = 0
            imagePath = (imageFiles)[i]
            print "Matching photo " + imagePath
            template = cv2.imread('badguy.jpg')
            matcher.match(imagePath, template)
            i = i + 1
            button_status = button_released
        else:
            controller.turn_light_off()
    except KeyboardInterrupt, SystemExit:
        controller.turn_light_off()
        controller.turn_buzzer_off()
        sys.exit()
        break
    except (IOError, TypeError) as e:
        print("Error")
 def test_matches_and_has_remaining_text(self):
     matcher = self.example_string_matcher()
     matcher.match("'foo\\'s bar and fig\\'s foo' and stuff")
     self.assertEqual(
         " and stuff",
         matcher.remaining_text)
 def test_match_for_complex_delims_remaining_text(self):
     matcher = self.example_comment_matcher()
     matcher.match('/* comment */this text remains')
     self.assertEqual('this text remains', matcher.remaining_text)
 def test_match_calculates_correct_remaining_text(self):
     matcher = self.example_exact_literal_matcher()
     matcher.match('->later text')
     self.assertEqual('later text', matcher.remaining_text)
 def test_matches_has_correct_remaining_value(self):
     matcher = self.example_while_matcher()
     matcher.match("foobar and stuff")
     self.assertEqual(
         " and stuff",
         matcher.remaining_text)
Exemple #32
0
def validate_model(
    prob_thresh=load_config()["machine_learning"]["prboability_thresholds"]["general"],
    test=False
):
    """Compares new model with status quo production model and compiles/reports the results.
    Based on results, will either replace model and archive old one or just maintain status quo.
    
    Parameters:
     - `prob_thresh` (float): probability threshold which the classifier will use to determine
     whether or not there is a match.
     - `test` (bool): whether in testing or not, will dtermine flow of operations and mute emails appropriately.

    """
    match_query = """
        SELECT
            company_projects.job_number,
            company_projects.city,
            company_projects.address,
            company_projects.title,
            company_projects.owner,
            company_projects.contractor,
            company_projects.engineer,
            company_projects.address_lat,
            company_projects.address_lng,
            company_projects.receiver_emails_dump,
            web_certificates.url_key,
            web_certificates.cert_id,
            attempted_matches.ground_truth,
            attempted_matches.multi_phase,
            web_certificates.pub_date,
            web_certificates.source,
            CONCAT(base_urls.base_url, web_certificates.url_key) AS link
        FROM
            web_certificates
        LEFT JOIN
            attempted_matches
        ON
            web_certificates.cert_id = attempted_matches.cert_id
        LEFT JOIN
            company_projects
        ON
            attempted_matches.project_id = company_projects.project_id
        LEFT JOIN
            base_urls
        ON
            base_urls.source = web_certificates.source
        WHERE 
            company_projects.closed=1
        AND
            attempted_matches.ground_truth=1
        AND 
            attempted_matches.multi_phase=0
        AND 
            attempted_matches.validate=1
    """
    corr_web_certs_query = """
        SELECT
            web_certificates.*
        FROM 
            web_certificates
        LEFT JOIN
            attempted_matches
        ON
            web_certificates.cert_id = attempted_matches.cert_id
        LEFT JOIN
            company_projects
        ON
            attempted_matches.project_id = company_projects.project_id
        LEFT JOIN
            base_urls
        ON
            base_urls.source = web_certificates.source
        WHERE 
            company_projects.closed=1
        AND
            attempted_matches.ground_truth=1
        AND 
            attempted_matches.multi_phase=0
        AND 
            attempted_matches.validate=1
    """
    with create_connection() as conn:
        validate_company_projects = pd.read_sql(match_query, conn)
        validate_web_df = pd.read_sql(corr_web_certs_query, conn)
    new_results = match(
        version="new",
        company_projects=validate_company_projects,
        df_web=validate_web_df,
        test=True,
        prob_thresh=prob_thresh,
    )
    analysis_df = pd.merge(
        new_results[['job_number', 'cert_id', 'pred_prob', 'pred_match', 'total_score']],
        validate_company_projects[['job_number', 'cert_id', 'ground_truth']],
        how='left',
        on=['job_number', 'cert_id']
    )
    analysis_df['ground_truth'] = analysis_df.ground_truth.apply(lambda x: 1 if x == 1.0 else 0)
    tp = len(analysis_df[(analysis_df.pred_match == 1) & (analysis_df.ground_truth == 1)])
    fp = len(analysis_df[(analysis_df.pred_match == 1) & (analysis_df.ground_truth == 0)])
    tn = len(analysis_df[(analysis_df.pred_match == 0) & (analysis_df.ground_truth == 0)])
    fn = len(analysis_df[(analysis_df.pred_match == 0) & (analysis_df.ground_truth == 1)])
    if fn:
        logger.warning(f"match for project #{list(analysis_df[(analysis_df.pred_match == 0) & (analysis_df.ground_truth == 1)]['job_number'])} was not detected.")
    logger.info(f"true postives: {tp}")
    logger.info(f"false postives: {fp}")
    logger.info(f"true negatives: {tn}")
    logger.info(f"false negatives: {fn}")
    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    logger.info(f"recall: {recall}")
    logger.info(f"precision: {precision}")
    min_prob = min(analysis_df[analysis_df.ground_truth == 1.0]['pred_prob'])
    logger.info(f"minimum probability threshhold to acheive 100% recall: {min_prob}")
    analysis_df['adj_pred_match'] = analysis_df.pred_prob.apply(lambda x: x >= min_prob)
    avg_prob = mean(analysis_df[analysis_df.ground_truth == 1.0]['pred_prob'])
    logger.debug(analysis_df[analysis_df.adj_pred_match])
    signal_and_noise = analysis_df[analysis_df.pred_prob > -0.1]
    signal = signal_and_noise[signal_and_noise.ground_truth == 1.0]['pred_prob']
    noise = signal_and_noise[signal_and_noise.ground_truth != 1.0]['pred_prob']
    interval = 0.1
    bottom_ranges = np.arange(0, 1, interval)
    ground_truths, false_matches = [], []
    for bottom_range in bottom_ranges:
        bottom_range = round(bottom_range, 1)
        upper_range = round((bottom_range + interval), 1)
        if bottom_range == 0.0:  # capture all the false matches scored at exactly 0
            bottom_range = -0.1
        ground_truths.append(len([value for value in signal if value <= upper_range and value > bottom_range]))
        false_matches.append(len([value for value in noise if value <= upper_range and value > bottom_range]))
    df = pd.DataFrame({
        'probability score' : bottom_ranges,
        'true match' : ground_truths,
        'false match' : false_matches
    })
    p1 = plt.bar(df['probability score'], df['true match'], width=0.07, align='edge', color=(112/255, 94/255, 204/255, 1))
    p2 = plt.bar(df['probability score'], df['false match'], width=0.07, align='edge', bottom=df['true match'], color=(112/255, 94/255, 134/255, 1))
    t = plt.axvline(x=prob_thresh, color=(70/255, 70/255, 80/255, 1), linestyle='--')
    plt.ylabel('# of matches')
    plt.xlabel('predicted probability of match')
    ax = plt.axes()
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    # ax.set_yscale('log', nonposy='clip')  # too glitchy to use
    plt.xticks([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1])
    plt.title('Precision Spread on Validation Data\n')
    plt.legend((p1[0], p2[0]), ('true match', 'false match'))
    # ax = plt.axes()
    # for spine in ax.spines:
    #     ax.spines[spine].set_visible(False)
    legend = plt.legend((p1[0], p2[0], t), ('true match', 'false match', 'decision threshold'), frameon=1)
    frame = legend.get_frame()
    frame.set_alpha(0)
    if not test:  # will also display inside jupyter notebook regardless (if %matplotlib inline)
        plt.savefig('static/precision_spread.png', transparent=True, dpi=300)
    if recall < 1.0:
        adj_tp = len(analysis_df[(analysis_df.adj_pred_match == 1) & (analysis_df.ground_truth == 1)])
        adj_fp = len(analysis_df[(analysis_df.adj_pred_match == 1) & (analysis_df.ground_truth == 0)])
        adj_tn = len(analysis_df[(analysis_df.adj_pred_match == 0) & (analysis_df.ground_truth == 0)])
        adj_fn = len(analysis_df[(analysis_df.adj_pred_match == 0) & (analysis_df.ground_truth == 1)])
        logger.info(f"adjusted true postives: {adj_tp}")
        logger.info(f"adjusted false postives: {adj_fp}")
        logger.info(f"adjusted true negatives: {adj_tn}")
        logger.info(f"adjusted false negatives: {adj_fn}")
        adj_recall = adj_tp / (adj_tp + adj_fn)
        adj_precision = adj_tp / (adj_tp + adj_fp)
        logger.info(f"adjusted recall: {adj_recall}")
        logger.info(f"adjusted precision: {adj_precision}")
        logger.info(f"Would have had {adj_fp} false positives ({adj_precision}% precision) if threshold was adjusted down to acheive 100%")
    try:
        sq_results = match(
            version="status_quo",
            company_projects=validate_company_projects,
            df_web=validate_web_df,
            test=True,
            prob_thresh=prob_thresh,
        )
    except FileNotFoundError:
        logger.info(
            "could not find any status quo models to use for baseline validation."
        )
        if not test:
            logger.info("adopting new model by default and skipping rest of validation")
            for filename in ["rf_model.pkl", "rf_features.pkl"]:
                os.rename("new_" + filename, filename)
            return  # exit function because there is no basline to validate against
        else:
            logger.info(
                "will keep testing validation using new model as baseline. Just for testing purposes."
            )
            sq_results = match(
                version="new",
                company_projects=validate_company_projects,
                df_web=validate_web_df,
                test=True,
                prob_thresh=prob_thresh,
            )
    sq_analysis_df = pd.merge(
        sq_results[['job_number', 'cert_id', 'pred_prob', 'pred_match', 'total_score']],
        validate_company_projects[['job_number', 'cert_id', 'ground_truth']],
        how='left',
        on=['job_number', 'cert_id']
    )
    sq_analysis_df['ground_truth'] = sq_analysis_df.ground_truth.apply(lambda x: 1 if x == 1.0 else 0)
    sq_tp = len(sq_analysis_df[(sq_analysis_df.pred_match == 1) & (sq_analysis_df.ground_truth == 1)])
    sq_fp = len(sq_analysis_df[(sq_analysis_df.pred_match == 1) & (sq_analysis_df.ground_truth == 0)])
    sq_tn = len(sq_analysis_df[(sq_analysis_df.pred_match == 0) & (sq_analysis_df.ground_truth == 0)])
    sq_fn = len(sq_analysis_df[(sq_analysis_df.pred_match == 0) & (sq_analysis_df.ground_truth == 1)])
    if sq_fn:
        logger.warning(f"match for project #{list(sq_analysis_df[(sq_analysis_df.pred_match == 0) & (sq_analysis_df.ground_truth == 1)]['job_number'])} was not detected.")
    logger.info(f"true postives: {sq_tp}")
    logger.info(f"false postives: {sq_fp}")
    logger.info(f"true negatives: {sq_tn}")
    logger.info(f"false negatives: {sq_fn}")
    sq_recall = sq_tp / (sq_tp + sq_fn)
    sq_precision = sq_tp / (sq_tp + sq_fp)
    logger.info(f"recall: {sq_recall}")
    logger.info(f"precision: {sq_precision}")
    sq_min_prob = min(sq_analysis_df[sq_analysis_df.ground_truth == 1.0]['pred_prob'])
    logger.info(f"minimum probability threshhold to acheive 100% recall: {sq_min_prob}")
    sq_analysis_df['adj_pred_match'] = sq_analysis_df.pred_prob.apply(lambda x: x >= sq_min_prob)
    sq_avg_prob = mean(sq_analysis_df[sq_analysis_df.ground_truth == 1.0]['pred_prob'])
    logger.debug(sq_analysis_df[sq_analysis_df.adj_pred_match])
    update_results({
        "probability threshold": prob_thresh,
        "SMOTE": load_config()["machine_learning"]["use_smote"],
        "100% recall acheived" : True if int(recall) == 1 else False,
        'minimum probability required for status quo model' : sq_min_prob,
        'minimum probability required for new model' : min_prob,
        'average probability required for status quo model' : sq_avg_prob,
        'average probability required for new model' : avg_prob,
        'false positives with status quo' : sq_fp,
        'false positives with new' : fp,
        'precision': precision,
    })
    if recall < 1.0:
        logger.warning(
            "100% recall not acheived with new model - archiving it "
            "and maintaining status quo!"
        )
        if test:
            logger.info("skipping files transfers because running in test mode")
        else:
            for artifact in ["model", "features"]:
                os.rename(
                    f"new_rf_{artifact}.pkl",
                    f"model_archive/rf_new_{artifact}-{datetime.datetime.now().date()}.pkl",
                )
    else:
        logger.info("100% recall acheived! Adopting new model and archiving old one.")
        if test:
            logger.info("skipping files transfers because running in test mode")
        else:
            for artifact in ["model", "features"]:
                os.rename(
                    f"rf_{artifact}.pkl",
                    f"model_archive/rf_{artifact}-{datetime.datetime.now().date()}.pkl",
                )
                os.rename(f"new_rf_{artifact}.pkl", f"rf_{artifact}.pkl")
        for metric, new, sq in zip(
            ("false positive(s)", "max threshold", "average prediction probability"),
            (fp, min_prob, avg_prob),
            (sq_fp, sq_min_prob, sq_avg_prob),
        ):
            if metric == "false positive(s)":
                if new <= sq:
                    good_outcome = True
                else:
                    good_outcome = False
            elif new >= sq:
                good_outcome = True
            else:
                good_outcome = False
            if good_outcome:
                logger.info(
                    f"New model produced {new} {metric}, "
                    f"which is better or equal to status quo of {sq}."
                )
            else:
                logger.warning(
                    f"Might want to investigate new model - new model produced "
                    f"{new} {metric}, compared to status quo of {sq}"
                )
Exemple #33
0
 def do(row):
     a, b = row
     if a[1] and b[1]:
         if match(a[1], b[1]) > 0:
             return a[0], b[0]
Exemple #34
0
    def test_truth_table(self):
        build_train_set()
        train_model(prob_thresh=prob_thresh)
        match_query = """
            SELECT
                company_projects.*,
                web_certificates.url_key
            FROM 
                web_certificates
            LEFT JOIN
                attempted_matches
            ON
                web_certificates.cert_id = attempted_matches.cert_id
            LEFT JOIN
                company_projects
            ON
                attempted_matches.project_id = company_projects.project_id
            LEFT JOIN
                base_urls
            ON
                base_urls.source = web_certificates.source
            WHERE 
                company_projects.closed=1
            AND
                attempted_matches.ground_truth=1
            AND 
                attempted_matches.multi_phase=0
            AND 
                attempted_matches.validate=0
        """
        corr_web_certs_query = """
            SELECT
                web_certificates.*
            FROM 
                web_certificates
            LEFT JOIN
                attempted_matches
            ON
                web_certificates.cert_id = attempted_matches.cert_id
            LEFT JOIN
                company_projects
            ON
                attempted_matches.project_id = company_projects.project_id
            LEFT JOIN
                base_urls
            ON
                base_urls.source = web_certificates.source
            WHERE 
                company_projects.closed=1
            AND
                attempted_matches.ground_truth=1
            AND 
                attempted_matches.multi_phase=0
            AND 
                attempted_matches.validate=0
        """

        with create_connection() as conn:
            test_company_projects = pd.read_sql(match_query, conn)
            test_web_df = pd.read_sql(corr_web_certs_query, conn)
        test_web_df = wrangle(test_web_df)
        results = match(
            company_projects=test_company_projects,
            df_web=test_web_df,
            test=True,
            prob_thresh=prob_thresh,
            version="new",
        )

        # confrim 100% recall with below assert
        qty_actual_matches = int(len(results)**0.5)
        qty_found_matches = results[results.pred_match == 1].title.nunique()
        self.assertTrue(
            qty_found_matches == qty_actual_matches,
            msg=
            f"qty_found_matches({qty_found_matches}) not equal qty_actual_matches({qty_actual_matches})",
        )

        # make sure not more than 25% false positives with below assert
        false_positives = len(
            results[results.pred_match == 1]) - qty_found_matches
        self.assertTrue(
            false_positives <= round(qty_actual_matches * 0.25, 1),
            msg=
            f"found too many false positives ({false_positives}) out of total test projects ({qty_actual_matches})",
        )

        # test single sample
        sample_company = pd.DataFrame(
            {
                "cert_id": "99999",
                "project_id": "99999",
                "job_number": "2387",
                "city": "Ottawa",
                "address": "2562 Del Zotto Ave., Ottawa, Ontario",
                "title": "DWS Building Expansion",
                "owner": "Douglas Stalker",
                "contractor": "GNC",
                "engineer": "Goodkey",
                "address_lat": 45.312234,
                "address_lng": -75.623789,
                "receiver_emails_dump": "{'alex': '*****@*****.**'}",
                "closed": "0",
            },
            index=range(1),
        )
        sample_web = pd.DataFrame(
            {
                "cert_id": "99998",
                "pub_date": "2019-03-06",
                "city": "Ottawa-Carleton",
                "address": "2562 Del Zotto Avenue, Gloucester, Ontario",
                "title":
                "Construct a 1 storey storage addition to a 2 storey office/industrial building",
                "owner": "Doug Stalker, DWS Roofing",
                "contractor": "GNC Constructors Inc.",
                "engineer": None,
                "address_lat": 45.312234,
                "address_lng": -75.623789,
                "url_key": "B0046A36-3F1C-11E9-9A87-005056AA6F02",
                "source": "dcn",
            },
            index=range(1),
        )
        is_match, prob = match(
            company_projects=sample_company,
            df_web=sample_web,
            test=True,
            version="new").iloc[0][["pred_match", "pred_prob"]]
        self.assertTrue(
            is_match,
            msg=
            f"Project #{sample_company.job_number} did not match successfully. Match probability returned was {prob}.",
        )

        # test same sample but using db retreival
        results = match(
            company_projects=sample_company,
            since="2019-03-05",
            until="2019-03-07",
            test=True,
            version="new",
        )
        prob_from_db_cert = (
            results[results.contractor == "gnc"].iloc[0].pred_prob
        )  #'gnc' is what is returned from the wrangling funcs
        self.assertTrue(round(prob, 2) == round(prob_from_db_cert, 2))

        # make sure validation runs
        validate_model(prob_thresh=prob_thresh, test=True)
Exemple #35
0
def match_results(content_regex, object_list):
    matched_results = []
    for list_member in object_list:
        matched_results.append(matcher.match(content_regex, list_member))
    return matched_results
def get_opbot_matches(dataset, model_pairs=None):
    # define models pairs, model1--model2
    # get model - activities
    # to  model1--model add acivity pairs, model1_task1--model2_task1 ... ect
    models = dataset  # i assume the models are loaded already
    if model_pairs is None:
        model_pairs = get_model_pairs(models)

    model_activity = {}  # model_id:{node_id:node}
    for model_name in models.keys():
        model_activity[model_name] = cf.get_hashable_nodes(
            models[model_name], active_components)

    final_matches = dict()
    for pair in model_pairs:
        final_matches[pair] = []

    # make copy of nodes kyes, later delete keys of nodes that have been filtered out
    nodes_keys = {}
    for pair in model_pairs:
        nodes_keys[pair] = {}
        nodes_keys[pair][pair.model1] = model_activity[
            pair.model1].keys()  # .copy()
        nodes_keys[pair][pair.model2] = model_activity[
            pair.model2].keys()  # .copy()

    # create a list of all tokenised labels from all models
    # used to create coocurance dictionary
    all_labels = []
    for model_id in models.keys():
        for node_id in model_activity[model_id].keys():
            all_labels.append(
                cf.get_tokens_without_stop_words(
                    model_activity[model_id][node_id]['node_name'].lower()))

    all_words_set = set()  # used later to create coocurance dict

    for m_pair in model_pairs:
        model1 = m_pair.model1
        model2 = m_pair.model2

        for node1 in model_activity[model1].keys():
            tokens = cf.get_tokens_without_stop_words(
                model_activity[model1][node1]['node_name'])

            for node2 in model_activity[model2].keys():

                for a_word in tokens + cf.get_tokens_without_stop_words(
                        model_activity[model2][node2]['node_name']):
                    all_words_set.add(a_word.lower())
                # Filtering step
                if model_activity[model1][node1][
                        'node_name'] == model_activity[model2][node2][
                            'node_name']:
                    # nodes with identical labels are matched
                    final_matches[m_pair].append(
                        matcher.match(model_activity[model1][node1],
                                      model_activity[model2][node2], 1.0))

                    if nodes_keys[m_pair][model1].__contains__(node1):
                        nodes_keys[m_pair][model1].remove(node1)
                    if nodes_keys[m_pair][model2].__contains__(node2):
                        nodes_keys[m_pair][model2].remove(node2)

    # normalise labels for remaning nodes
    # create variable , {model: {node_id:normalised_label}}
    labels = {}
    for model_name in models:
        labels[model_name] = {}
        for node in model_activity[model_name].keys():
            labels[model_name][node] = cf.stemming(
                cf.get_tokens_without_stop_words(
                    model_activity[model_name][node]['node_name'].lower()))

    # Extract activity pairs
    activity_pairs_all = []
    for pair in model_pairs:
        nodes1_keys = nodes_keys[pair][pair.model1]
        nodes2_keys = nodes_keys[pair][pair.model2]
        for node1 in nodes1_keys:
            for node2 in nodes2_keys:
                graph1 = cf.get_graph_with_id_nodes(pair.bpmn1)
                graph2 = cf.get_graph_with_id_nodes(pair.bpmn2)
                n1 = model_activity[pair.model1][node1]
                n2 = model_activity[pair.model2][node2]
                tokens1 = cf.get_tokens_without_stop_words(
                    model_activity[pair.model1][node1]['node_name'].lower())
                tokens2 = cf.get_tokens_without_stop_words(
                    model_activity[pair.model2][node2]['node_name'].lower())
                st1 = cf.get_current_to_start_node(node=n1,
                                                   graph=graph1,
                                                   diagram=pair.bpmn1)
                st2 = cf.get_current_to_start_node(node=n2,
                                                   graph=graph2,
                                                   diagram=pair.bpmn2)
                new_activity_pair = activity_pair(
                    pair=pair,
                    node1=node1,
                    label1=labels[pair.model1][node1],
                    st1=st1,
                    tokens1=tokens1,
                    node2=node2,
                    label2=labels[pair.model2][node2],
                    st2=st2,
                    tokens2=tokens2)
                activity_pairs_all.append(new_activity_pair)
                pair.activities_pairs.append(new_activity_pair)

    coocurance = cco.get_coocccurance_dict(all_words_set, all_labels)
    cco_matcher = cco.cco_occurance_similarity_calculator(coocurance, 2)

    # calculate similarities
    bots_results = {}
    bots_results['B1'] = get_BOT_results(activity_pairs_all, models, 'LIN',
                                         False)
    bots_results['B2'] = get_BOT_results(activity_pairs_all, models, 'LIN',
                                         True)
    bots_results['B3'] = get_BOT_results(activity_pairs_all, models, 'LEV',
                                         False)
    bots_results['B4'] = get_BOT_results(activity_pairs_all, models, 'LEV',
                                         True)
    bots_results['B5'] = get_BOT_results(activity_pairs_all, models, 'CCO',
                                         False, cco_matcher)
    bots_results['B6'] = get_BOT_results(activity_pairs_all, models, 'CCO',
                                         True, cco_matcher)

    # determine thresholds
    bot_thresholds = {}
    bot_thresholds['B1'] = determine_thresholds(bot_results=bots_results['B1'],
                                                models=model_pairs,
                                                pre_t_min=0.6,
                                                pre_t_max=1)
    bot_thresholds['B2'] = determine_thresholds(bot_results=bots_results['B2'],
                                                models=model_pairs,
                                                pre_t_min=0.6,
                                                pre_t_max=1)
    bot_thresholds['B3'] = determine_thresholds(bot_results=bots_results['B3'],
                                                models=model_pairs,
                                                pre_t_min=0.6,
                                                pre_t_max=1)
    bot_thresholds['B4'] = determine_thresholds(bot_results=bots_results['B4'],
                                                models=model_pairs,
                                                pre_t_min=0.6,
                                                pre_t_max=1)
    bot_thresholds['B5'] = determine_thresholds(bot_results=bots_results['B5'],
                                                models=model_pairs,
                                                pre_t_min=0.7,
                                                pre_t_max=1)
    bot_thresholds['B6'] = determine_thresholds(bot_results=bots_results['B6'],
                                                models=model_pairs,
                                                pre_t_min=0.7,
                                                pre_t_max=1)
    # rank bot results
    best, second_best = get_two_best_thresholds(bot_thresholds, bots_results)
    # determine alignments
    f_matches = get_matches_by_combine(activity_pairs_all, bots_results,
                                       bot_thresholds, best, second_best,
                                       model_pairs)
    # return
    to_return = {}
    for k in final_matches.keys():
        to_return[k] = final_matches[k] + f_matches[k]
    # it returns {bpmn_models_pair:list_of_matches}
    return to_return
Exemple #37
0
def add(text):
    data = []
    textlist = text.split(" ")
    #jenis  matkul tugas  dan tugas
    for kata in bd.getList_Kata_Penting():
        for i in range(len(textlist)):
            index = matcher.match(textlist[i].lower(), kata.lower())
            if (index):
                data.append(textlist[i + 1])
                data.append(str(textlist[i]).lower())
                nama = textlist[i + 2]

                pada_tgl = ["pada", "tanggal"]
                for kata2 in pada_tgl:
                    for k in range(len(textlist[i + 2 + 1:])):
                        index2 = matcher.match(textlist[i + 2 + k].lower(),
                                               kata2.lower())
                        if (index2):
                            for j in range(k - 1):
                                nama += " " + textlist[i + 2 + j + 1]
                            break

                    if (index2):
                        break

                data.append(nama)

                break
        if (index):
            break

    #tanggal Tugas
    for kata in bd.getList_Kata_Tampil_Deadline():
        for i in range(len(textlist)):
            index = matcher.match(textlist[i].lower(), kata.lower())
            if (index):
                if (len(textlist[i + 1]) > 2):
                    data.insert(0, textlist[i + 1])

                else:
                    bulan_int = bulan.get(textlist[i + 2].lower())
                    tanggal = textlist[
                        i + 1] + "/" + bulan_int + "/" + textlist[i + 3]
                    data.insert(0, tanggal)
                break
        if (index is not False):
            break

    if (len(data) == 4):
        (tgl, bln, th) = re.split("/", data[0])
        date = datetime.date(int(th), int(bln), int(tgl))
        N = len(bd.getList_Daftar_Tugas()) + 1
        if (bd.IsInputValid(date, data[1], data[2], data[3], False)):
            bd.upsert_Daftar_Tugas(N, date, data[1], data[2], data[3], False)
            output = "[ ===== Berhasil Ditambahkan =====]<br>"
            output += "(ID: " + str(N) + ") " + str(
                date) + " " + data[1] + " " + data[2] + " " + data[3] + "<br>"
            return output
        else:
            return "Data yang sama telah digunakan"
    else:
        return "-1"
Exemple #38
0
def ValidasiInput(text):
    data = daftar_katakunci(text)
    if (matcher.match(str(text).lower(), "undur")
            or matcher.match(str(text).lower(), "ubah")):
        if ("pada" not in data):
            return """Gunakan kata "pada" sebelum tanggal"""
        else:
            return diundurTask(text)

    else:

        if len(
                data
        ) <= 2 and "antara" not in data and "depan" not in data and "deadline" not in data:
            if (len(data) == 1):
                if ("tanggal" not in data and "pada" not in data):
                    return """Gunakan kata ["tangal", "pada"] sebelum tanggal"""
                else:
                    return """Gunakan kata ["tubes", "tucil", "kuis", "ujian", "pr"]"""
            else:
                return add(text)

        elif ("antara" in data):
            if ("tubes" in data):
                return antaraTanggal_Jenis(text, "tubes")
            elif ("tucil" in data):
                return antaraTanggal_Jenis(text, "tucil")
            elif ("kuis" in data):
                return antaraTanggal_Jenis(text, "kuis")
            elif ("ujian" in data):
                return antaraTanggal_Jenis(text, "ujian")
            elif ("pr" in data):
                return antaraTanggal_Jenis(text, "pr")
            else:
                return antaraTanggal(text)

        elif ("depan" in data):

            if ("hari" in data):
                if ("tubes" in data):
                    return haritask_Jenis(text, "tubes")
                elif ("tucil" in data):
                    return haritask_Jenis(text, "tucil")
                elif ("kuis" in data):
                    return haritask_Jenis(text, "kuis")
                elif ("ujian" in data):
                    return haritask_Jenis(text, "ujian")
                elif ("pr" in data):
                    return haritask_Jenis(text, "pr")
                else:
                    return haritask(text)
            elif ("minggu" in data):
                if ("tubes" in data):
                    return minggutask_Jenis(text, "tubes")
                elif ("tucil" in data):
                    return minggutask_Jenis(text, "tucil")
                elif ("kuis" in data):
                    return minggutask_Jenis(text, "kuis")
                elif ("ujian" in data):
                    return minggutask_Jenis(text, "ujian")
                elif ("pr" in data):
                    return minggutask_Jenis(text, "pr")
                else:
                    return minggutask(text)
            else:
                return "-1"

        elif ("deadline" in data):
            if ("hari" in data):
                if ("tubes" in data):
                    return hariIni_Jenis("tubes")
                elif ("tucil" in data):
                    return hariIni_Jenis("tucil")
                elif ("kuis" in data):
                    return hariIni_Jenis("kuis")
                elif ("ujian" in data):
                    return hariIni_Jenis("ujian")
                elif ("pr" in data):
                    return hariIni_Jenis("pr")
                else:
                    return hariIni()

            elif ("sejauh" in data):
                if ("tubes" in data):
                    return sejauhIni_Jenis("tubes")
                elif ("tucil" in data):
                    return sejauhIni_Jenis("tucil")
                elif ("kuis" in data):
                    return sejauhIni_Jenis("kuis")
                elif ("ujian" in data):
                    return sejauhIni_Jenis("ujian")
                elif ("pr" in data):
                    return sejauhIni_Jenis("pr")
                else:
                    return sejauhIni()
            else:
                return "-1"
        else:
            return "-1"