def upload_file():
   if request.method == 'POST':
      f = request.files['zipfile']
      f.save(secure_filename(f.filename))
      #job_desc=request.form.get("jobdesc")
      #num_rank=request.form.get("rank")
      #req_details=[job_desc,num_rank]
      jd_skills=request.form.get("skills")
      c_sep_skills=jd_skills.split(',')
      final_skills=[]
      for skill in c_sep_skills:
         skill=skill.strip()
         skill=skill.replace(" " ,"_")
         final_skills.append(skill)
      
      reader=PDFReader()
      allresumes=reader.extract_resumes(f.filename)
      resumes_scores = {}
      #Initialize the headers by writing in to the csv, later we just append it.
      #row columns: id, filename, name, score, rank
      with open('cv_list.csv','w') as csvFile:
         writer=csv.writer(csvFile)            
         writer.writerow(["ID","filename","Name","Score","Rank"])
      csvFile.close()

      for index in range(len(allresumes[0])):
         
         analysed_list=reader.analyze_resume(allresumes[0][index], allresumes[1][index], index + 1, final_skills)
         resume_score = analysed_list[0]
         name=analysed_list[1]
         resumes_scores[allresumes[1][index]] = resume_score
         
         row=[index+1,allresumes[1][index],name,resume_score]
         
         with open('cv_list.csv','a') as csvFile:
            writer=csv.writer(csvFile)            
            writer.writerow(row)
         csvFile.close()
      
      ranker=Ranker()
      ranked_list=ranker.rank_csv()


      return render_template('printresult.html', scores=resumes_scores, title="View Scores",skills=final_skills, ranked_list=ranked_list)
	def mutate(cls, _, args, context, info):
	
		query = Learners.get_query(context)
		learner = query.filter(LearnerModel.email == args.get('email')).first()

		if learner is None:
			learner = LearnerModel(email=args.get('email'), 
		                     timestamp=args.get('ts'),
		                     role=args.get('role'),
		                     availability=args.get('availability'),
		                     org_level=args.get('org_level'),
		                     org=args.get('org'),
		                     interest=args.get('interest'),
		                     change_track=args.get('change_track'),
		                     outside_org=args.get('outside_org'),
		                     requests=args.get('requests'),
		                     identify_as=args.get('identify_as'),
		                     fullname=args.get('full_name'),
		                     manager_email=args.get('manager_email'),
		                     #Sent_Wecome_Email__date_=args.get('welcome_email_date'),
		                     #Sent_Manager_Approval_Email__date_=args.get('manager_approval_email_date'),
		                     #Manager_Approved__date_=args.get('manager_approved_date'),
		                     #Mentee_Limit=args.get('mentee_limit'),
		                     #Gender=args.get('gender'),
		                     )
			db_session.add(learner)
		else:
			learner.timestamp=args.get('ts')
			learner.role=args.get('role')
			learner.availability=args.get('availability')
			learner.org_level=args.get('org_level')
			learner.org=args.get('org')
			learner.interest=args.get('interest')
			learner.change_track=args.get('change_track')
			learner.outside_org=args.get('outside_org')
			learner.requests=args.get('requests')
			learner.identify_as=args.get('identify_as')
			learner.fullname=args.get('full_name')
			learner.manager_email=args.get('manager_email')
		db_session.commit()
		
		# on successful commit, compute ranking and update or overwrite in rankings table
		rankings = LearnerMentorRankingsModel(learner=args.get('email'), 
		                     mentor='testmentor2',
		                     ranking=NaiveRanker.rank_mentors(args.get('email')))
		db_session.add(rankings)
		db_session.commit()
		
		# return top 3 rankings for learner
		
		ok = True
		return createOrUpdateLearner(learner=learner, rankings=rankings, ok=ok)
Exemple #3
0
    def __init__(self, path_wieght: str, path_data: str, similarity,
                 path_feat: str):
        self.path_weight = path_wieght
        self.path_data = path_data
        self.similarity = similarity
        self.flickr_dataset = ImageFlickrFeatures(
            path_feat)  #dbs/features_contrastive.db
        # self.ranking = ranking

        imagenet_net = ResNet34()
        sketches_net = ResNet34()

        # print("Adapting output layers...")

        siamese_net = SiameseNetwork(sketches_net, imagenet_net)
        siamese_net.load_state_dict(
            torch.load(self.path_weight)
        )  # r'C:\Users\aleja\Desktop\Tareas\Reconocimiento Virtual con Deep Learning\T2\best_SiameseNetwork_contrastive.pth'
        self.net = siamese_net
        self.ranking = Ranker(self.path_data,
                              image_dataset_features=self.flickr_dataset,
                              feature_extractor=self.net,
                              similarity_fn=self.similarity)
	def mutate(cls, _, args, context, info):
		query = Mentors.get_query(context)
		mentor = query.filter(MentorModel.email == args.get('email')).first()

		if mentor is None:
			mentor = MentorModel(email=args.get('email'), 
		                     timestamp=args.get('ts'),
		                     role=args.get('role'),
		                     availability=args.get('availability'),
		                     org_level=args.get('org_level'),
		                     org=args.get('org'),
		                     expertise=args.get('expertise'),
		                     outside_org=args.get('outside_org'),
		                     requests=args.get('requests'),
		                     identify_as=args.get('identify_as'),
		                     fullname=args.get('full_name'),
		                     manager_email=args.get('manager_email'),
		                     #Sent_Wecome_Email__date_=args.get('welcome_email_date'),
		                     #Sent_Manager_Approval_Email__date_=args.get('manager_approval_email_date'),
		                     #Manager_Approved__date_=args.get('manager_approved_date'),
		                     #Mentee_Limit=args.get('mentee_limit'),
		                     #Gender=args.get('gender'),
		                     )
			db_session.add(mentor)
		else:
			mentor.timestamp=args.get('ts')
			mentor.role=args.get('role')
			mentor.availability=args.get('availability')
			mentor.org_level=args.get('org_level')
			mentor.org=args.get('org')
			mentor.expertise=args.get('expertise')
			mentor.outside_org=args.get('outside_org')
			mentor.requests=args.get('requests')
			mentor.identify_as=args.get('identify_as')
			mentor.fullname=args.get('full_name')
			mentor.manager_email=args.get('manager_email')

		db_session.commit()
		
		# on successful commit, compute ranking and update or overwrite in rankings table
		rankings = MentorLearnerRankingsModel(mentor=args.get('email'), 
		                     learner='testlearner2',
		                     ranking=NaiveRanker.rank_learners(args.get('email')))
		db_session.add(rankings)
		db_session.commit()
		
		ok = True
		return createOrUpdateMentor(mentor=mentor, rankings=rankings, ok=ok)
Exemple #5
0
    def evaluate(self, query: str, options: dict, ranker: Ranker,
                 callback: Callable[[dict], Any]) -> None:
        """
        Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms,
        a document is considered to be a match if it contains at least N <= M of those terms.

        The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the
        client via the supplied callback function.

        The client can supply a dictionary of options that controls this query evaluation process: The value of
        N is inferred from the query via the "recall_threshold" (float) option, and the maximum number of documents
        to return to the client is controlled via the "hit_count" (int) option.
        """
        # lager en iterator per term i query
        query = self._inverted_index.get_terms(query)
        qcounter = Counter(query)
        uniqe_terms = tuple(qcounter.keys())
        multi = tuple(qcounter.values())
        sieve = Sieve(options["hit_count"])
        iter_list = []
        query2 = []
        for t in uniqe_terms:
            query2.append(t)
            list_iter = self._inverted_index.get_postings_iterator(t)
            iter_list.append(list_iter)
        # beregner antall terms fra query som må finnes i teksten
        recall_threshold = options["recall_threshold"]
        min_treff = max(1.0, math.floor(recall_threshold * len(iter_list)))
        # lager en liste over første element i alle iteratorene
        peek = []
        for n in range(len(iter_list)):
            peek.append(next(iter_list[n], None))
        antall_iter = len(iter_list)

        def finn_minste():
            # finner doc med lavest id i peek
            minste = peek[0]
            i = 1
            while minste is None:
                minste = peek[i]
                i += 1
                if i >= len(peek):
                    break
            for i in range(len(peek)):
                if peek[i] is not None:
                    if minste.document_id > peek[i].document_id:
                        minste = peek[i]
            return minste

        while antall_iter >= min_treff:
            min_doc = finn_minste()
            if min_doc is None:
                break
            # sjekk om vi tilfredstiller recall threshold for laveste docID
            ranker.reset(min_doc.document_id)
            antall_treff = 0
            for n in range(len(peek)):
                if peek[n] is not None:
                    if peek[n].document_id == min_doc.document_id:
                        antall_treff += 1
                        ranker.update(uniqe_terms[n], multi[n], peek[n])
            # evaluer ranking og kast til sieve
            if antall_treff >= min_treff:
                sieve.sift(ranker.evaluate(), min_doc.document_id)
            # fjerner dokument fra listene
            for n in range(len(iter_list)):
                if peek[n] is not None:
                    if peek[n].document_id == min_doc.document_id:
                        peek[n] = next(iter_list[n], None)
                        if peek[n] is None:
                            antall_iter -= 1
            # rinse and repeat
        iteren = sieve.winners()
        lista = []
        for f in iteren:
            lista.append(f)
            callback({
                "score": int(f[0]),
                "document": self._corpus.get_document(f[1])
            })
Exemple #6
0
    def evaluate(self, query: str, options: dict, ranker: Ranker,
                 callback: Callable[[dict], Any]) -> None:
        """
        Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms,
        a document is considered to be a match if it contains at least N <= M of those terms.

        The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the
        client via the supplied callback function.

        The client can supply a dictionary of options that controls this query evaluation process: The value of
        N is inferred from the query via the "match_threshold" (float) option, and the maximum number of documents
        to return to the client is controlled via the "hit_count" (int) option.

        The callback function supplied by the client will receive a dictionary having the keys "score" (float) and
        "document" (Document).
        """

        # Print verbose debug information?
        debug = options.get("debug", False)

        # Produce the query terms. We must use the same string processing here as we used when
        # building up the inverted index. Some terms might be duplicated (e.g., as in the query
        # "to be or not to be").
        query_terms = self._inverted_index.get_terms(query)
        unique_query_terms = [
            (term, count) for (term, count) in Counter(query_terms).items()
        ]

        # Get the posting lists for the unique query terms.
        posting_lists = [
            self._inverted_index[term] for (term, _) in unique_query_terms
        ]

        # We require that at least N of the M query terms are present in the document,
        # for the document to be considered part of the result set. What should the minimum
        # value of N be?
        # TODO: Take multiplicity into account, and not just uniqueness.
        match_threshold = max(0.0, min(1.0,
                                       options.get("match_threshold", 0.5)))
        required_minimum = max(
            1,
            min(len(unique_query_terms),
                int(match_threshold * len(unique_query_terms))))

        # When traversing the posting lists using document-at-a-time traversal, we need to keep track
        # of where we are in each of the posting lists. Initially, all the cursors "point to" the first entry
        # in each posting list. Keep track of which posting lists that remain to be fully traversed.
        all_cursors = [next(p, None) for p in posting_lists]
        remaining_cursor_ids = [
            i for i in range(len(all_cursors)) if all_cursors[i]
        ]

        # We're doing ranked retrieval. Assess relevance scores per document as we go along, as we're doing
        # document-at-a-time traversal. Keep track of the K highest-scoring documents.
        sieve = Sieve(max(1, min(100, options.get("hit_count", 10))))

        # We're doing at least N-of-M matching. As we reach the end of the posting lists, we can abort when
        # the number of non-exhausted lists drops below the required minimum N.
        while len(remaining_cursor_ids) >= required_minimum:

            # The posting lists are sorted by the document identifiers in ascending order. Define the
            # "frontier" as the subset of non-exhausted posting lists that mention the lowest document
            # identifier. In a sense, if we imagine scanning the posting lists from left to right, the
            # frontier is the subset that has the "leftmost" cursors.
            # TODO: This can easily be done in a single pass over the remaining lists.
            document_id = min(
                [all_cursors[i].document_id for i in remaining_cursor_ids])
            frontier_cursor_ids = [
                i for i in remaining_cursor_ids
                if all_cursors[i].document_id == document_id
            ]

            # The number of elements on the "frontier" needs to be at least N. Otherwise, these documents
            # don't contain enough of the query terms, and aren't part of the result set.
            if len(frontier_cursor_ids) >= required_minimum:
                ranker.reset(document_id)
                for i in frontier_cursor_ids:
                    ranker.update(unique_query_terms[i][0],
                                  unique_query_terms[i][1], all_cursors[i])
                score = ranker.evaluate()
                sieve.sift(score, document_id)
                if debug:
                    print("*** MATCH")
                    print("document =", self._corpus[document_id])
                    print(
                        "matches  =", {
                            unique_query_terms[i][0]: all_cursors[i]
                            for i in frontier_cursor_ids
                        })
                    print("score    =", score)

            # Move along the cursors on the frontier. The cursors not on the frontier remain where they
            # are. We may or may not reach the end of some posting lists when we advance, so the set of
            # remaining non-exhausted lists might shrink.
            for i in frontier_cursor_ids:
                all_cursors[i] = next(posting_lists[i], None)
            remaining_cursor_ids = [
                i for i in range(len(all_cursors)) if all_cursors[i]
            ]

        # Alert the client about the best-matching documents, using the supplied callback function.
        # Emit documents sorted accoring to their relevancy scores.
        for (score, document_id) in sieve.winners():
            callback({"score": score, "document": self._corpus[document_id]})
Exemple #7
0
class Evaluator:
    def __init__(self, path_wieght: str, path_data: str, similarity,
                 path_feat: str):
        self.path_weight = path_wieght
        self.path_data = path_data
        self.similarity = similarity
        self.flickr_dataset = ImageFlickrFeatures(
            path_feat)  #dbs/features_contrastive.db
        # self.ranking = ranking

        imagenet_net = ResNet34()
        sketches_net = ResNet34()

        # print("Adapting output layers...")

        siamese_net = SiameseNetwork(sketches_net, imagenet_net)
        siamese_net.load_state_dict(
            torch.load(self.path_weight)
        )  # r'C:\Users\aleja\Desktop\Tareas\Reconocimiento Virtual con Deep Learning\T2\best_SiameseNetwork_contrastive.pth'
        self.net = siamese_net
        self.ranking = Ranker(self.path_data,
                              image_dataset_features=self.flickr_dataset,
                              feature_extractor=self.net,
                              similarity_fn=self.similarity)

    def calc_rank(self, path_img):
        rank = self.ranking.get_rank(path_img)
        return rank

    def calc_all_ranks(self, path_querys):
        self.imgs_names = listdir(path_querys)
        for i in range(len(self.imgs_names)):
            self.imgs_names[i] = path_querys + '/' + self.imgs_names[i]

        self.classes = []
        self.ranks = []
        for i in range(len(self.imgs_names)):
            c, rank = self.ranking.get_rank(self.imgs_names[i])
            self.classes.append(c)
            self.ranks.append(rank)

    def calc_map(self):
        mean_ap = map(self.classes, self.ranks)
        return mean_ap

    def calc_recall_ratio(self, len_class_path: str):
        x, y = recall_ratio_tot(self.classes, self.ranks, len_class_path)
        plt.plot(x, y)
        plt.xlabel('Recall')
        plt.ylabel('Retrieved images')
        plt.title('Recall ratio Curve')
        return x, y

    def calc_recall_prec(self, len_class_path: str):
        rp = recall_prec_tot(self.classes, self.ranks, len_class_path)
        rec = np.array([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        plt.plot(rec, rp)
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Recall-Precision Curve')
        return rp
Exemple #8
0
    def evaluate(self, query: str, options: dict, ranker: Ranker, callback: Callable[[dict], Any]) -> None:
        """
        Evaluates the given query, doing N-out-of-M ranked retrieval. I.e., for a supplied query having M terms,
        a document is considered to be a match if it contains at least N <= M of those terms.

        The matching documents are ranked by the supplied ranker, and only the "best" matches are returned to the
        client via the supplied callback function.

        The client can supply a dictionary of options that controls this query evaluation process: The value of
        N is inferred from the query via the "match_threshold" (float) option, and the maximum number of documents
        to return to the client is controlled via the "hit_count" (int) option.

        The callback function supplied by the client will receive a dictionary having the keys "score" (float) and
        "document" (Document).

        if the query contains m unique query terms, each document in the result set should contain at least n of these m terms.
        """
        terms = list(self._inverted_index.get_terms(query))
        threshhold = options.get("match_threshold")
        debug = options.get("debug", False)
        counter_terms = Counter(terms)
        hit = options.get('hit_count')
        sieve = Sieve(hit)
        m = len(terms)
        n = max(1, min(m, int(threshhold * m)))

        class Aktiv(object):
            def __init__(self, invertedindex, term, multiplicity):
                self.term = term
                self.iterator = invertedindex.get_postings_iterator(term)
                self.posting = next(self.iterator, None)
                self.multiplicity = multiplicity
                self.hasBeenRanked = False

            @property
            def document_id(self):
                return self.posting.document_id

            def neste_posting(self):
                self.posting = next(self.iterator, None)

        aktive = []  # liste av posting liste-iteratorer

        for term in terms:
            aktiv = Aktiv(self._inverted_index, term, counter_terms[term])
            if aktiv.posting is not None:
                aktive.append(aktiv)
        forrige_minste = None
        while len(aktive) > 0:
            (minste, index) = min((v.document_id, i) for i, v in enumerate(aktive))
            current = aktive[index]
            if minste != forrige_minste:
                aktive_docids = [a for a in aktive if a.document_id == minste]
                ranker.reset(current.document_id)
                evaluated_terms = []
                # må gå gjennom aktive_docids for å sjekke term og frequency
                for a in aktive_docids:
                    if a.term not in evaluated_terms:
                        ranker.update(a.term, a.multiplicity, a.posting)
                        evaluated_terms.append(a.term)
                score = ranker.evaluate()
                if threshhold == 1:
                    if not len(aktive_docids) < n and score >= n:
                        sieve.sift(score, minste)
                else:
                    if score >= n and len(aktive_docids) >= n:
                        sieve.sift(score, minste)
            forrige_minste = minste
            current.neste_posting()
            post = current.posting
            if post is None:
                aktive.pop(index)

        for win in sieve.winners(): # append the winners
            doc = self._corpus.get_document(win[1])
            callback({'score': win[0], 'document': doc})