Beispiel #1
0
def save_initial_images(liked_images_dir, disliked_images_dir, database,
                        database_txt, all_liked_artworks_dir):
    '''Saves an initial set of liked and disliked images to database and exports the database.
    Also renames and copies all liked artworks to all_liked_artworks.
    '''
    print('Adding initial artworks to database...')
    database.clear_all()
    folders = [liked_images_dir, disliked_images_dir]
    for folder in folders:
        for artwork in os.listdir(folder):
            # Create the new artwork
            new_work = Artwork()
            new_work.ID = database.number_of_artworks + 1
            new_work.like = 0
            if folder == liked_images_dir:
                new_work.like = 1
            new_work.artist = artwork
            new_work.added = str(
                datetime.datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
            new_work.original_name = artwork
            new_work.vector = convert(folder + artwork)

            # Add the new artwork to the database
            database.add_artwork(new_work)

            # If it's a liked artwork, rename it to its ID and copy it to the permanent
            # folder for liked artworks
            if new_work.like == 1:
                shutil.copy(liked_images_dir + new_work.original_name,
                            all_liked_artworks_dir + str(new_work.ID))

    print('Added initial artworks to database')

    # Export the full database
    database.export_full(database_txt)
Beispiel #2
0
def predict_and_move(scraped_art_dir, scraped_info_txt, models_txt, database,
                     database_txt, predicted_disliked_txt):
    '''For each freshly scraped artwork, predicts if the viewer will like it. If it will be liked,
    the image becomes an Artwork and is added to the Database. If it will be disliked, the .jpg/.png
    is deleted and some of the artwork's info is stored in predicted_disliked.txt.
    '''
    print('Predicting which artworks will be liked...')

    name_and_liked = {}

    # Get the predicted liked-values for each artwork and store them with their file names
    for artwork in os.listdir(scraped_art_dir):
        # Predict if the artwork will be liked given the latest model in models.txt
        value = mt.predict_if_liked(scraped_art_dir + artwork,
                                    mt.get_theta(models_txt))

        # Add the name of the artwork and its predicted liked value to the dictionary
        name_and_liked[artwork] = value

    # Go through the info for each freshly scraped artwork in scraped_info.txt
    new_artworks = []
    scraped_txt = open(scraped_info_txt, 'r')
    reader = csv.reader(scraped_txt, delimiter=',')
    for work in reader:
        if work[0] != 'artist' and work[0] != '':
            # Get the original name of the .jpg, stored in scraped_info.txt
            original_name = work[4]
            i1 = original_name.find('full/')
            i2 = original_name.find("', 'checksum'")
            original_name = original_name[i1 + 5:i2]

            # Only convert the work into an Artwork if the viewer is expected to like it
            if name_and_liked.get(original_name) == 1:
                this_work = Artwork()
                this_work.ID = database.number_of_artworks + 1

                # Set the like for this work to 2, indicating that the viewer has not yet
                # rated it
                this_work.like = 2

                this_work.artist = work[0]
                this_work.info = work[1]
                this_work.nga_id = int(work[2])
                this_work.nga_page = int(work[5])
                this_work.title = work[6]
                this_work.nga_link = work[7]
                this_work.added = str(
                    datetime.datetime.now().strftime("%m/%d/%Y %H:%M:%S"))
                this_work.original_name = original_name
                this_work.vector = convert(scraped_art_dir + original_name)

                new_artworks.append(this_work)

                # Add the work to the Database
                database.add_artwork(this_work)

            # If the viewer is expected to dislike the artwork, save some info about it to
            # predicted_disliked.txt and delete the .jpg/.png.
            elif name_and_liked.get(original_name) == 0:
                # Save the artist, info, title, viewing link, and time added
                disliked = work[0] + '~' + work[1] + '~' + work[
                    6] + '~' + work[7] + '~' + str(
                        datetime.datetime.now().strftime("%m/%d/%Y %H:%M:%S"))

                file = open(predicted_disliked_txt, 'a')
                file.write(disliked)
                file.write('\n')
                file.close()

                # Delete the .jpg/.png file
                os.remove(scraped_art_dir + original_name)

    # Export the new (predicted liked) Artworks to the database
    database.export_new(database_txt, new_artworks)

    print('Predicted that %s out of %s artworks will be liked' %
          (len(new_artworks), len(name_and_liked)))