Ejemplo n.º 1
0
    def save(self, *args, **kwargs):

        if self.duplication:
            self.merge()
        else:
            document = Document()

            document.description = self.description

            document.save()

            titleTokens = getTokensFromText(self.title)
            for title in titleTokens:
                titleToken = Token(title)
                titleToken.save()
                TitleOrder.objects.create(token=titleToken, document=document)

            dateTokens = getTokensFromText(self.date)
            for date in dateTokens:
                dateToken = Token(date)
                dateToken.save()
                DateOrder.objects.create(token=dateToken, document=document)

            locationTokens = getTokensFromText(self.location)
            for location in locationTokens:
                locationToken = Token(location)
                locationToken.save()
                LocationOrder.objects.create(token=locationToken, document=document)

            genreTokens = getTokensFromList(self.genres)
            for genre in genreTokens:
                genreToken = Token(genre)
                genreToken.save()
                GenresOrder.objects.create(token=genreToken, document=document)

            artistTokens = getTokensFromList(self.artists)
            for artist in artistTokens:
                artistToken = Token(artist)
                artistToken.save()
                ArtistOrder.objects.create(token=artistToken, document=document)

            tagTokens = getTokensFromList(self.tags)
            for tag in tagTokens:
                tagToken = Token(tag)
                tagToken.save()
                TagOrder.objects.create(token=tagToken, document=document)

            for url in self.urls:
                urlUrl = Url(url)
                urlUrl.save()
                UrlOrder.objects.create(url=urlUrl, document=document)

            for imageUrl in self.imageUrls:
                imageUrlUrl = Url(imageUrl)
                imageUrlUrl.save()
                ImageOrder.objects.create(url=imageUrlUrl, document=document)

            document.save
Ejemplo n.º 2
0
    def merge(self):
        if self.duplication:

            if not self.duplication.description:
                self.duplication.description = self.description

            self.duplication.save()

            dateTokens = getTokensFromText(self.date)
            for date in dateTokens:
                dateToken = Token(date)
                dateToken.save()
                if not DateOrder.objects.filter(token=dateToken, document=self.duplication).exists():
                    DateOrder.objects.create(token=dateToken, document=self.duplication)

            locationTokens = getTokensFromText(self.location)
            for location in locationTokens:
                locationToken = Token(location)
                locationToken.save()
                if not LocationOrder.objects.filter(token=locationToken, document=self.duplication).exists():
                    LocationOrder.objects.create(token=locationToken, document=self.duplication)

            genreTokens = getTokensFromList(self.genres)
            for genre in genreTokens:
                genreToken = Token(genre)
                genreToken.save()
                if not GenresOrder.objects.filter(token=genreToken, document=self.duplication).exists():
                    GenresOrder.objects.create(token=genreToken, document=self.duplication)

            artistTokens = getTokensFromList(self.artists)
            for artist in artistTokens:
                artistToken = Token(artist)
                artistToken.save()
                if not ArtistOrder.objects.filter(token=artistToken, document=self.duplication).exists():
                    ArtistOrder.objects.create(token=artistToken, document=self.duplication)

            tagTokens = getTokensFromList(self.tags)
            for tag in tagTokens:
                tagToken = Token(tag)
                tagToken.save()
                if not TagOrder.objects.filter(token=tagToken, document=self.duplication).exists():
                    TagOrder.objects.create(token=tagToken, document=self.duplication)

            for url in self.urls:
                urlUrl = Url(url)
                urlUrl.save()
                if not UrlOrder.objects.filter(url=urlUrl, document=self.duplication).exists():
                    UrlOrder.objects.create(url=urlUrl, document=self.duplication)

            for imageUrl in self.imageUrls:
                imageUrlUrl = Url(imageUrl)
                imageUrlUrl.save()
                if not ImageOrder.objects.filter(url=imageUrlUrl, document=self.duplication).exists():
                    ImageOrder.objects.create(url=imageUrlUrl, document=self.duplication)

            self.duplication.save
Ejemplo n.º 3
0
def findDuplicateInResults(document, results):
    for result in results:
        for url in result.urls.all():
            # If the same url -> definitly duplicate
            if url.name in document.urls:
                #print("FOUND DUPLICATE");
                return result

        # Check for overlap in Artist, Genre, Location and Date
        if (hasOverlap(getTokensFromText(document.date), result.date.all()) and 
            hasOverlap(getTokensFromText(document.location), result.location.all()) and 
            hasOverlap(getTokensFromList(document.genres), result.genres.all()) and 
            hasOverlap(getTokensFromList(document.artists), result.artists.all())):
            
            #print("FOUND DUPLICATE");
            return result