Esempio n. 1
0
def create_tags_csv(location: str = ""):
    """
    extract tags from the file name
    write a csv file with those tags
    :param location: optional content of directory column

    This csv can be modified to be used with :func:`write_exif_using_csv` or :func:`placeinfo.write_infos`
    If you want to modify it with EXCEL or Calc take care to import all columns of the csv as text.
    """
    inpath = os.getcwd()
    tag_set = OrderedSet()
    tag_set_names = OrderedSet()
    out_filename = get_info_dir("tags_places.csv")
    tags_places_file, writer = fileop.create_csv_writer(
        out_filename, ["directory", "name_part"])
    filenameAccessors = [
        FilenameAccessor(filename)
        for filename in get_plain_filenames_of_type(image_types, inpath)
    ]
    for fileNameAccessor in filenameAccessors:
        for tag in fileNameAccessor.tags():
            tag_set.add(tag)
    writeToFile(get_info_dir("tags.txt"),
                location + "\n\t" + "\n\t".join(tag_set) + "\n")
    for tag in tag_set:
        tag_set_names.add((location, tag))
    writer.writerows(tag_set_names)
    tags_places_file.close()
def test_index():
    values = list(range(100))
    random.shuffle(values)
    os = OrderedSet(values)
    assert len(os) == len(values)
    for value in values:
        assert values.index(value) == os.index(value)
def test_add():
    os = OrderedSet()
    for value in range(100):
        os.add(value)
    assert len(os) == 100
    for value in range(100):
        assert value in os
Esempio n. 4
0
def create_counters_csv_per_dir():
    """
    extract counter from the file name
    write a csv file with those counters for each directory

    This csv can be modified to be used with :func:`write_exif_using_csv`
    If you want to modify it with EXCEL or Calc take care to import all columns of the csv as text.
    """
    log_function_call(create_tags_csv_per_dir.__name__)
    inpath = os.getcwd()
    tag_set_names = OrderedSet()
    out_filename = get_info_dir("tags_counters.csv")
    csvfile, writer = fileop.create_csv_writer(out_filename, [
        "directory", "name_main", "name_part", "first", "last", "tags3",
        "description"
    ])
    for (dirpath, dirnames, filenames) in os.walk(inpath):
        if not inpath == dirpath: continue
        for dirname in dirnames:
            filenameAccessors = [
                FilenameAccessor(filename)
                for filename in get_plain_filenames_of_type(
                    image_types, dirpath, dirname)
            ]
            if len(filenameAccessors) == 0: continue
            _add_counter_csv_entries(dirname, filenameAccessors, tag_set_names)
    writer.writerows(tag_set_names)
    csvfile.close()
def test_getitem():
    values = list(range(100))
    random.shuffle(values)
    os = OrderedSet(values)
    assert len(os) == len(values)
    for index in range(len(os)):
        assert os[index] == values[index]
def test_contains():
    os = OrderedSet(range(100))
    assert len(os) == 100
    for value in range(100):
        assert value in os
        assert os.count(value) == 1
    assert -1 not in os
    assert 100 not in os
Esempio n. 7
0
def tag_dict_to_options(data: dict) -> list:
    options = []
    for key in data:
        if not data[key]: continue
        if type(data[key]) == list:
            data_set = OrderedSet(data[key])
            for value in data_set:
                if value:
                    options.append("-%s=%s" % (key, value))
        else:
            options.append("-%s=%s" % (key, data[key]))
    return options
Esempio n. 8
0
def create_names_csv_per_dir(start_after_dir=''):
    """
    extract names from the file path
    write a csv file with those names for each directory

    This csv can be modified to be used with :func:`write_exif_using_csv`
    If you want to modify it with EXCEL or Calc take care to import all columns of the csv as text.
    """
    log_function_call(create_names_csv_per_dir.__name__)
    inpath = os.getcwd()
    tag_set_names = OrderedSet()
    out_filename = get_info_dir("tags_names.csv")
    csvfile, writer = fileop.create_csv_writer(
        out_filename, ["directory", "name_main", "tags"])
    for (dirpath, dirnames, filenames) in os.walk(inpath):
        if is_invalid_path(dirpath): continue
        filenameAccessors = [
            FilenameAccessor(filename)
            for filename in filterFiles(filenames, image_types)
        ]
        if len(filenameAccessors) == 0: continue
        tags = []
        found = False
        for part in dirpath.split(os.sep):
            if found:
                tags += part.split(', ')
            else:
                found = part == start_after_dir
        filenameAccessorLast = filenameAccessors[0]
        tag_set_names.add(
            (", ".join(tags), filenameAccessorLast.pre,
             ', '.join(OrderedSet(tags + [filenameAccessorLast.pre]))))
        for filenameAccessor in filenameAccessors[1:]:
            if not filenameAccessor.pre == filenameAccessorLast.pre:
                tag_set_names.add(
                    (", ".join(tags), filenameAccessor.pre,
                     ', '.join(OrderedSet(tags + [filenameAccessor.pre]))))
            filenameAccessorLast = filenameAccessor
    writer.writerows(tag_set_names)
    csvfile.close()
Esempio n. 9
0
def create_tags_csv_per_dir():
    """
    extract tags from the file name
    write a csv file with those tags and group them by toplevel directory

    This csv can be modified to be used with :func:`write_exif_using_csv` or :func:`placeinfo.write_infos`
    If you want to modify it with EXCEL or Calc take care to import all columns of the csv as text.
    """
    log_function_call(create_tags_csv_per_dir.__name__)
    inpath = os.getcwd()
    tag_set_names = OrderedSet()
    out_filename = get_info_dir("tags_per_dir.csv")
    tags_places_file, writer = fileop.create_csv_writer(
        out_filename, ["directory", "name_part"])
    for (dirpath, dirnames, filenames) in os.walk(inpath):
        if not inpath == dirpath: continue
        for dirname in dirnames:
            tag_set = OrderedSet()
            filenameAccessors = [
                FilenameAccessor(filename)
                for filename in get_plain_filenames_of_type(
                    image_types, dirpath, dirname)
            ]
            if len(filenameAccessors) == 0: continue
            for fileNameAccessor in filenameAccessors:
                for tag in fileNameAccessor.tags():
                    tag_set.add(tag)
            writeToFile(get_info_dir("tags.txt"),
                        dirname + "\n\t" + "\n\t".join(tag_set) + "\n")

            dirname_split = dirname.split("_")
            subnames = [
                subname for subname in dirname_split
                if not subname.isnumeric()
            ]
            dirname = "_".join(subnames)
            for tag in tag_set:
                tag_set_names.add((dirname, tag))
    writer.writerows(tag_set_names)
    tags_places_file.close()
Esempio n. 10
0
 def __init__(self,
              distance_measure:
              MonophonicRhythmDistanceMeasure = HammingDistanceMeasure,
              rhythm_player: tp.Union[BSRhythmPlayer,
                                      tp.Type[BSRhythmPlayer],
                                      None] = None):
     self._config = BSConfig()
     self._corpus = None  # type: MidiRhythmCorpus
     self._corpus_resolution = -1
     self._distances_to_target = np.empty(0)
     self._distances_to_target_rhythm_are_stale = False
     self._rhythm_measure = SummedMonophonicRhythmDistance(
     )  # type: SummedMonophonicRhythmDistance
     self._rhythm_selection = OrderedSet()
     self._target_rhythm = None  # type: tp.Union[MidiRhythm, None]
     self._target_rhythm_prev_update = None
     self._lock = threading.Lock()
     self._rhythm_player = None
     self._callbacks = dict((action, OrderedSet()) for action in [
         BSController.RHYTHM_SELECTION, BSController.CORPUS_LOADED,
         BSController.DISTANCES_TO_TARGET_UPDATED, BSController.
         RHYTHM_PLAYBACK_START, BSController.RHYTHM_PLAYBACK_STOP,
         BSController.TARGET_RHYTHM_SET, BSController.DISTANCE_MEASURE_SET,
         BSController.RHYTHM_LOADER_REGISTERED
     ])
     self._rhythm_loaders = OrderedDict(
     )  # rhythm loaders by loader source name
     self.set_rhythm_player(rhythm_player)
     self.set_distance_measure(distance_measure)
     # automatically register a loader for the currently selected rhythm
     self.register_rhythm_loader(BSSelectedMidiRhythmLoader(self))
     # setup config change handlers
     self._setup_config()
     # if a midi root directory is set, load the corpus
     if self.get_config().midi_root_directory.get():
         self.load_corpus()
Esempio n. 11
0
 def set_description_from_relevant(self):
     claims = self._db.get_claims()
     topic_judgments = self._read_judgments(self._judgment_path)
     posts = self._db.get_posts()
     post_dict = {p.post_id: p for p in posts}
     for claim in claims:
         topic_id = int(claim.claim_id)
         # tweets = self._twitter_api.get_tweets_by_ids(topic_judgments[topic_id][:self._num_of_relevant_tweets])
         # posts, authors = self._db.convert_tweets_to_posts_and_authors(tweets, self._domain)
         posts = list(
             map(post_dict.get,
                 topic_judgments[topic_id][:self._num_of_relevant_tweets]))
         claim_content = OrderedSet(claim.keywords.lower().split())
         for post in posts:
             list(map(claim_content.add, clean_tweet(post.content).split()))
             # if len(claim_content) > 25:
             #     break
         claim.description = clean_claim_description(
             ' '.join(claim_content), True)
     self._db.addPosts(claims)
Esempio n. 12
0
    def to_tag_dict(self) -> dict:
        if not self.title:
            self.title = ", ".join(
                OrderedSet(self.location.get_minor() + self.tags))

        self._write_description_tags()
        if len(self.description_tree.keys()) > 0:
            self._write_description_tree()
        full_description = (FileMetaData.linesep + "\n\n").join(
            self.descriptions)

        tagDict = {
            'Label': self.filenameAccessor.name,
            'Title': self.title,
            'Keywords': self.tags,
            'Subject': list(self.tags),
            'Description': full_description,
            'UserComment': full_description,
            'Identifier': self.id,
            'Rating': self.rating
        }

        if settings.photographer:
            tagDict['Artist'] = settings.photographer

        if len(self.gps) == 2:
            tagDict["GPSLatitudeRef"] = self.gps[0]
            tagDict["GPSLatitude"] = self.gps[0]
            tagDict["GPSLongitudeRef"] = self.gps[1]
            tagDict["GPSLongitude"] = self.gps[1]

        if self.dateTimeOriginal and FileMetaData.dateTimeOriginal_regex.match(
                self.dateTimeOriginal):
            tagDict["DateTimeOriginal"] = self.dateTimeOriginal

        add_dict(tagDict, self.location.to_tag_dict())
        tagDict['Keywords'].extend(self.tags2 + self.tags3)
        tagDict['Subject'].extend(self.tags2 + self.tags3)
        return tagDict
def test_discard():
    os = OrderedSet(range(100))
    for value in range(200):
        os.discard(value)
    assert len(os) == 0
def test_repr():
    os = OrderedSet()
    assert repr(os) == 'OrderedSet([])'
def test_init():
    os = OrderedSet()
    assert len(os) == 0
Esempio n. 16
0
    def _greedy_search(self, generate_next_keywords, search_type):
        claims = self._db.get_claims()
        min_tweets = self._min_tweet_count
        # word_tf_idf_dict = self._keywords_generator.get_word_tf_idf_of_claims()
        for i, claim in enumerate(claims):
            keywords_tweet_num = defaultdict(int)
            if i < self._start_from_claim:
                continue
            walked_keywords = Counter()
            print('{} Claim {}/{}'.format(search_type, i, len(claims)))
            start = timeit.default_timer()
            claim_description_words = self.get_claim_words_from_description(
                claim)
            ordered_words = OrderedSet(claim_description_words)
            base_keywords = ordered_words if search_type == 'top_down' else set(
            )
            # num_of_potential_words = len(ordered_words)
            num_of_potential_words = self._max_keywords_size
            keywords_list = []
            for size in range(1, num_of_potential_words + 1):
                same_keywords_size = Counter()
                best_word_rank_tuple = ['', 1000]
                for iter, word in enumerate(ordered_words):
                    keywords_str = ' '.join(
                        generate_next_keywords(base_keywords, word))
                    keywords_size = len(keywords_str.split(' '))
                    type_name = '{}_iter_{}_keywords_size_{}'.format(
                        search_type, iter, keywords_size)
                    evaluation = self.eval_keywords_for_claim(
                        claim, keywords_str, type_name)
                    if best_word_rank_tuple[1] > evaluation[
                            'distance'] and evaluation[
                                'tweet_num'] > min_tweets:
                        best_word_rank_tuple = [word, evaluation['distance']]
                    print('\r{} Distance: {}'.format(type_name,
                                                     evaluation['distance']),
                          end='')
                    keywords_tweet_num[keywords_str] = evaluation['tweet_num']
                    if evaluation['tweet_num'] > min_tweets:
                        walked_keywords[
                            keywords_str] = -1.0 * evaluation['distance']
                        same_keywords_size[
                            keywords_str] = -1.0 * evaluation['distance']
                    keywords_list.append(
                        [keywords_str, evaluation['distance'], type_name])
                if ordered_words:
                    if best_word_rank_tuple[0] == '':
                        best_word_rank_tuple[0] = ordered_words.pop()
                    else:
                        ordered_words.discard(best_word_rank_tuple[0])
                base_keywords = generate_next_keywords(base_keywords,
                                                       best_word_rank_tuple[0])

                curr_distance = best_word_rank_tuple[1]
                if len(same_keywords_size) > 0:
                    keywords, best_distances = same_keywords_size.most_common(
                        1)[0]
                    self._add_new_keywords(
                        claim, keywords,
                        '{}_keywords_size_{}'.format(search_type, size),
                        -1.0 * best_distances, keywords_tweet_num[keywords])
                else:
                    self._add_new_keywords(
                        claim, ' '.join(base_keywords),
                        '{}_keywords_size_{}'.format(search_type,
                                                     size), curr_distance,
                        keywords_tweet_num[' '.join(base_keywords)])

            for keywords, keywords_distance, type_name in keywords_list:
                self._add_new_keywords(claim, keywords, type_name,
                                       keywords_distance,
                                       keywords_tweet_num[keywords])

            if len(walked_keywords) > 0:
                keywords, best_distances = list(
                    zip(*(walked_keywords.most_common(
                        self._output_keywords_count))))
            else:
                sorted_by_second = sorted(keywords_list,
                                          key=lambda tup: tup[1],
                                          reverse=True)
                keywords, best_distances, type_name = list(
                    zip(*sorted_by_second[:self._output_keywords_count]))

            self._add_new_keywords(
                claim, '||'.join(keywords), '{}_final'.format(search_type),
                -1.0 * np.mean(best_distances),
                sum(list(keywords_tweet_num[k] for k in keywords)))
            with self._db.session.no_autoflush:
                self._db.addPosts(self._keywords_connections)
            self._keywords_connections = []
            end = timeit.default_timer()
            print('run time: {}'.format((end - start)))
def test_index_error():
    os = OrderedSet(range(10))
    with pytest.raises(ValueError):
        os.index(10)
Esempio n. 18
0
def find_bad_exif(do_move=True,
                  check_date_additional=False,
                  folder: str = r""):
    """
    find files with missing exif data
    """
    log_function_call(find_bad_exif.__name__, do_move)

    clock = Clock()
    inpath = os.getcwd()
    lines_no_tags = OrderedSet()
    lines_bad_date_additional = OrderedSet()
    lines_date_missing = OrderedSet()
    out_filename_no_tags = get_info_dir("no_tags.csv")
    file_no_tags, writer_no_tags = fileop.create_csv_writer(
        out_filename_no_tags, ["directory", "name_part"])
    out_filename_bad_date_additional = get_info_dir("bad_date_additional.csv")
    file_bad_date_additional, writer_bad_date_additional = fileop.create_csv_writer(
        out_filename_bad_date_additional, ["directory", "name_part"])
    out_filename_date_missing = get_info_dir("date_missing.csv")
    file_date_missing, writer_date_missing = fileop.create_csv_writer(
        out_filename_date_missing, ["directory", "name_part"])
    for (dirpath, dirnames, filenames) in os.walk(inpath):
        if is_invalid_path(dirpath, regex=folder): continue
        if fileop.count_files(filenames, settings.image_types) == 0: continue
        Tagdict = read_exiftags(dirpath, settings.image_types, ask=False)
        if len(list(Tagdict.values())) == 0: continue
        leng = len(list(Tagdict.values())[0])
        for i in range(leng):
            if (not "Keywords" in Tagdict or not Tagdict["Keywords"][i]) or \
                    (not "Subject" in Tagdict or not Tagdict["Subject"][i]) or \
                    (not "Description" in Tagdict or not Tagdict["Description"][i]) or \
                    (not "User Comment" in Tagdict or not Tagdict["User Comment"][i]):
                lines_no_tags.add((os.path.basename(dirpath),
                                   _remove_counter(Tagdict["File Name"][i])))
                if do_move and not "bad_exif" in dirpath:
                    move(
                        Tagdict["File Name"][i], dirpath,
                        dirpath.replace(
                            inpath, os.path.join(inpath, "bad_exif_keywords")))
            if not "Date/Time Original" in Tagdict or not Tagdict[
                    "Date/Time Original"][i]:
                lines_date_missing.add(
                    (os.path.basename(dirpath),
                     _remove_counter(Tagdict["File Name"][i])))
                if do_move and not "bad_exif" in dirpath:
                    move(
                        Tagdict["File Name"][i], dirpath,
                        dirpath.replace(
                            inpath,
                            os.path.join(inpath, "bad_exif_date_missing")))
            if check_date_additional and \
                    (("Date Created" in Tagdict and Tagdict["Date Created"][i]) or
                     ("Time Created" in Tagdict and Tagdict["Time Created"][i]) or
                     ("Create Date" in Tagdict and Tagdict["Create Date"][i]) or
                     ("Modify Date" in Tagdict and Tagdict["Modify Date"][i]) or
                     ("Digital Creation Date" in Tagdict and Tagdict["Digital Creation Date"][i])):
                lines_bad_date_additional.add(
                    (os.path.basename(dirpath),
                     _remove_counter(Tagdict["File Name"][i])))
                if do_move and not "bad_exif" in dirpath:
                    move(
                        Tagdict["File Name"][i], dirpath,
                        dirpath.replace(
                            inpath,
                            os.path.join(inpath, "bad_exif_date_additional")))
    writer_no_tags.writerows(lines_no_tags)
    writer_bad_date_additional.writerows(lines_bad_date_additional)
    writer_date_missing.writerows(lines_date_missing)
    file_no_tags.close()
    file_bad_date_additional.close()
    file_date_missing.close()
    clock.finish()
def test_reversed():
    os = OrderedSet(range(100))
    assert list(reversed(os)) == list(reversed(range(100)))
    names = ['eve', 'carol', 'alice', 'dave', 'bob']
    os = OrderedSet(names)
    assert list(reversed(os)) == list(reversed(names))
def test_iter():
    os = OrderedSet(range(100))
    assert list(os) == list(range(100))
    names = ['eve', 'carol', 'alice', 'dave', 'bob']
    os = OrderedSet(names)
    assert list(os) == names
Esempio n. 21
0
def test_index_error():
    os = OrderedSet(range(10))
    os.index(10)