Ejemplo n.º 1
0
    def get(self, request, regex, format=None):
        debut = time.time()
        res = []

        termes = TermesUrl.objects.filter(terme=str(regex))
        for terme in termes:
            serializer = TermesUrlSerializer(terme)
            ids = serializer.data['ids'].split(";")
            for id in ids:
                try:
                    book = BooksUrl.objects.get(bookID=int(id))
                    serial = BooksUrlSerializer(book)
                    res.append(serial.data['bookID'])
                except:
                    print("book " + str(serial.data['bookID']) + " not found")
        res.sort()
        res_closeness = closeness.closenessCentrality(res)

        resultats = []
        for id_book in res_closeness:
            book = BooksUrl.objects.get(bookID=int(id_book))
            serializer = BooksUrlSerializer(book).data
            resultats.append(serializer)
        suggestions = suggestion.getSuggestions(resultats)
        fin = time.time()
        print("temps: " + str(fin - debut), "nb res: " + str(len(resultats)))
        return JsonResponse(
            {
                "resultats": resultats,
                "suggestions": suggestions
            }, safe=False)
Ejemplo n.º 2
0
def put_catalog_in_db(self):
    book_ids = []
    for directory_item in os.listdir(settings.CATALOG_RDF_DIR):
        item_path = os.path.join(settings.CATALOG_RDF_DIR, directory_item)
        if os.path.isdir(item_path):
            try:
                book_id = int(directory_item)
            except ValueError:
                # Ignore the item if it's not a book ID number.
                pass
            else:
                book_ids.append(book_id)
    book_ids.sort()
    books = BooksUrl.objects.all()
    for b in books:
        serializer = BooksUrlSerializer(b)
        id = int(serializer.data['bookID'])

        if (id > 0) and (id % 500 == 0):
            log('%d' % id)

        book_path = os.path.join(
            settings.CATALOG_RDF_DIR,
            str(serializer.data['bookID']),
            'pg' + str(serializer.data['bookID']) + '.rdf'
        )
        book = util.get_book(id, book_path)
        try:
            BooksUrl.objects.filter(pk=b.pk).update(cover=book['cover'])
            BooksUrl.objects.filter(pk=b.pk).update(auteurs=book['auteurs'])
            b.refresh_from_db()
            print("Update success")
        except:
            print("Update fail")
Ejemplo n.º 3
0
    def get(self, request, format=None):
        res = []
        for livre in BooksUrl.objects.all():
            serializer = BooksUrlSerializer(livre)
            res.append(serializer.data)

        return JsonResponse(res, safe=False)
Ejemplo n.º 4
0
 def handle(self, *args, **options):
     books = BooksUrl.objects.all()
     self.parse_occurences_file()
     for ib1 in range(115, len(books)):
         book1 = BooksUrlSerializer(books[ib1]).data
         termes_b1 = self.find_terms_in_file(book1)
         if len(termes_b1) > 0:
             for ib2 in range(ib1 + 1, len(books)):
                 book2 = BooksUrlSerializer(books[ib2]).data
                 termes_b2 = self.find_terms_in_file(book2)
                 if book1['bookID'] != book2['bookID'] and len(
                         termes_b2) > 0:
                     print("---------------------------LIVRES",
                           book1['bookID'], book2['bookID'],
                           "-------------------------")
                     self.jaccard_distance(book1, book2, termes_b1,
                                           termes_b2)
Ejemplo n.º 5
0
def getSuggestionsForRelevantResults(previousBooks):
    closeBooks = []
    res = []
    for previousBook in previousBooks:
        jaccardDistances = Jaccard.objects.filter(
            Q(x_bookID=previousBook['bookID'])
            | Q(y_bookID=previousBook['bookID']))
        closestJaccardDistances = getClosestJaccardEntries(
            JaccardSerializer(jaccardDistances, many=True).data)
        closeBooks += getBooksFromJaccardEntries(closestJaccardDistances,
                                                 previousBook['bookID'])
    for book in closeBooks:
        res.append(BooksUrlSerializer(book).data)
    return res
Ejemplo n.º 6
0
    def get(self, request, regex, format=None):
        debut = time.time()
        res = []
        regex.lower()
        dict_from_csv = {}
        with open(settings.DATABASES_DIR + '/database.csv', mode='r') as inp:
            reader = csv.reader(inp)
            dict_from_csv = {
                str(rows[0]): str(rows[2] + ";" + rows[3])
                for rows in reader
            }

        threads = list()
        for i in range(1, BooksUrl.objects.all().count() + 1, 50):
            if i + 49 > BooksUrl.objects.all().count():
                maxi = BooksUrl.objects.all().count()
            else:
                maxi = i + 49
            x = threading.Thread(target=self.printBooks,
                                 args=(
                                     i,
                                     maxi,
                                     regex,
                                     dict_from_csv,
                                 ))
            threads.append(x)
            x.start()

        for index, thread in enumerate(threads):
            thread.join()

        res = self.ids
        res.sort()
        res_closeness = closeness.closenessCentrality(res)

        resultats = []
        for id_book in res_closeness:
            book = BooksUrl.objects.get(bookID=int(id_book))
            serializer = BooksUrlSerializer(book).data
            resultats.append(serializer)
        suggestions = suggestion.getSuggestions(resultats)
        fin = time.time()
        print("temps: " + str(fin - debut), "nb res: " + str(len(resultats)))
        return JsonResponse(
            {
                "resultats": resultats,
                "suggestions": suggestions
            }, safe=False)
Ejemplo n.º 7
0
 def get(self, request, pk, format=None):
     livre = self.get_object(pk)
     serializer = BooksUrlSerializer(livre)
     return JsonResponse(serializer.data)
Ejemplo n.º 8
0
 def handle(self, *args, **options):
     livres = BooksUrl.objects.filter(bookID__gte=464, bookID__lte=1993)
     for livre in livres:
         serializer = BooksUrlSerializer(livre)
         url = serializer.data['url']
         id = serializer.data['bookID']
         print("---------------------------LIVRE", id,
               "-------------------------")
         try:
             URL = url
             DOWNLOAD_PATH = os.path.join(TEMP_PATH,
                                          'text' + str(id) + ".txt")
             urllib.request.urlretrieve(URL, DOWNLOAD_PATH)
             with open(DOWNLOAD_PATH) as f:
                 lines = f.readlines()
             for i in range(len(lines)):
                 sentence = lines[i].split()
                 mots = []
                 for mot in sentence:
                     mot = changeCharac(mot)
                     new_mots = mot.split()
                     for mt in new_mots:
                         mots.append(mt)
                 for mot in mots:
                     isBlacklister = preg_macth(mot)
                     mot = mot.lower()
                     if not isBlacklister and mot != "" and len(mot) > 1:
                         count_terme = TermesUrl.objects.filter(
                             terme=str(mot)).count()
                         if count_terme == 0:
                             new_serializer = TermesUrlSerializer(
                                 data={
                                     'terme': str(mot),
                                     'ids': str(id)
                                 })
                             if new_serializer.is_valid():
                                 try:
                                     new_serializer.save()
                                 except:
                                     print("ERROR SAVE SERIALIZER")
                             else:
                                 print("ERROR SERIALIZER")
                         else:
                             termes = TermesUrl.objects.filter(
                                 terme=str(mot))
                             for terme in termes:
                                 serial = TermesUrlSerializer(terme)
                                 id_string = serial.data['ids']
                                 ids = id_string.split(";")
                                 if str(id) not in ids:
                                     ids.append(str(id))
                                     new_ids = ";".join(ids)
                                     TermesUrl.objects.filter(
                                         pk=terme.pk).update(
                                             ids=str(new_ids))
                                     terme.refresh_from_db()
         except:
             print("ERROR")
         self.stdout.write(
             self.style.SUCCESS('[' + time.ctime() +
                                '] Livre avec comme url ="%s"' %
                                str(serializer.data['bookID'])))