Ejemplo n.º 1
0
    def on_message(self, message):
        logging.info("Received message: "+str(message))

        data = json.loads(message)

        teacher = data['teacher']
        student = data['student']

        result = utils.similarity(teacher, student, **data)

        self.send(json.dumps(result, default=lambda x: str(x)))
Ejemplo n.º 2
0
Archivo: bow.py Proyecto: kmwenja/ftm
def main():
    if len(sys.argv) < 3:
        print "Usage: {0} sentence_1 sentence_2 wsd stemmer sim scoring context".format(sys.argv[0])
        return

    sentence_1 = sys.argv[1]
    sentence_2 = sys.argv[2]

    lesk_type = "adapted"

    if len(sys.argv) >= 4:
        lesk_type = sys.argv[3]
    
    stemmer = "lancaster"

    if len(sys.argv) >= 5:
        stemmer = sys.argv[4]

    sim_option = "path"

    if len(sys.argv) >= 6:
        sim_option = sys.argv[5]

    scoring_option = "min"

    if len(sys.argv) >= 7:
        scoring_option = sys.argv[6]

    context = ""

    if len(sys.argv) >= 8:
        context = sys.argv[7]

    result = similarity(
        sentence_1, sentence_2,
        wsd=lesk_type, stemmer=stemmer,
        similarity=sim_option, scoring=scoring_option, context=context)

    print "Score: ", result['score']

    print "Reason:"
    for candidate in result['candidates']:
        print candidate['word1']['token'], \
            "("+str(candidate['word1']['definition'])+")",\
            candidate['match'], \
            candidate['word2']['token'], \
            "("+str(candidate['word2']['definition'])+")"
Ejemplo n.º 3
0
    def mark(self, **kwargs):
        stemmer = kwargs.get('stemmer', None)
        wsd = kwargs.get('wsd', None)
        sim = kwargs.get('similarity', None)
        scorer = kwargs.get('scorer', None)
        tentative_threshold_opt = kwargs.get('tentative_threshold_opt', None)
        threshold_opt = kwargs.get('threshold_opt', None)

        results = []

        marking_answers = self.markinganswer_set.all()

        tentative_threshold = 0

        if tentative_threshold_opt == 'min':
            tentative_threshold = 10000

        thresholds = []

        for answer in marking_answers:
            result = utils.similarity(
                answer.text, answer.text,
                stemmer=stemmer, wsd=wsd, similarity=sim, scoring=scorer)

            if tentative_threshold_opt == 'min':
                if result['score'] < tentative_threshold:
                    tentative_threshold = result['score']

            if tentative_threshold_opt == 'mean':
                thresholds.append(result['score'])

            if tentative_threshold_opt == 'max':
                if result['score'] > tentative_threshold:
                    tentative_threshold = result['score']

        if tentative_threshold_opt == 'mean':
            try:
                tentative_threshold = sum(thresholds) / len(thresholds)
            except ZeroDivisionError:
                tentative_threshold = 0

        threshold = threshold_opt if threshold_opt else tentative_threshold

        for answer in self.studentanswer_set.all():
            max_result = None
            max_score = -1
            max_markinganswer = None

            for marking_answer in marking_answers:
                context = self.text + " " + marking_answer.text
                result = utils.similarity(
                    marking_answer.text, answer.text,
                    stemmer=stemmer, wsd=wsd, similarity=sim, scoring=scorer, context=context)
                if result['score'] > max_score:
                    max_result = result
                    max_score = result['score']
                    max_markinganswer = marking_answer

            sent_1_terms = [candidate['word1']['token'] for candidate in max_result['candidates']]
            sent_2_terms = [candidate['word2']['token'] for candidate in max_result['candidates']]

            results.append(
                {'answer': answer,
                 'marking_answer': max_markinganswer,
                 'result': max_result,
                 'sent_1_terms': sent_1_terms,
                 'sent_2_terms': sent_2_terms})

        return results, threshold