Ejemplo n.º 1
0
 def _get_and_link_resource(asset_path):
     """Links downloaded asset name with its soure URL."""
     path = join(asset_path, str(randrange(1000000)))
     with open(path, "wb") as f:
         f.write(response.read())
     self.linker[url] = path
     dump_as_json(self.linker, self.linker_path)
     return ("", [])
Ejemplo n.º 2
0
 def handle_data(self, tag):
     # Work only with non-empty tag strings.
     if re.sub("[\s+]", "", tag):
         if self.base_url not in self.sites_content:
             self.sites_content[self.base_url] = tag
         else:
             self.sites_content[self.base_url] += " " + tag
         if sys.getsizeof(self.sites_content) > 10e2:
             dump_as_json(self.sites_content, self.raw_data)
             self.sites_content = {}
Ejemplo n.º 3
0
 def set_freq(self):
     """Returs a dictionary with the word frequency for each website."""
     for site, tags in self.words_by_site.items():
         self.word_frequency[site] = defaultdict(int)
         words = tags.split(" ")
         for word in words:
             # Save words containing no punctuation characters.
             match = [char in word for char in string.punctuation]
             if all(m is False for m in match) and len(word) > 3:
                 self.word_frequency[site][word] += 1
     dump_as_json(self.word_frequency, self.freqs_file_path)
     return self.word_frequency
Ejemplo n.º 4
0
    def persist_reports(self, new_reports):
        """

        :return:
        """
        try:
            reports = load_as_json('data.json')
        except ValueError:
            reports = {'reports': []}

        reports['reports'].extend(new_reports['reports'])
        valid_reports = self.validate(reports['reports'])
        dump_as_json(valid_reports, 'data.json', 'w')
Ejemplo n.º 5
0
    def persist_reports(self, new_reports):
        """

        :return:
        """
        try:
            reports = load_as_json('data.json')
        except ValueError:
            reports = {'reports': []}

        reports['reports'].extend(new_reports['reports'])
        valid_reports = self.validate(reports['reports'])
        dump_as_json(valid_reports, 'data.json', 'w')
Ejemplo n.º 6
0
 def persist_table(self):
     dump_as_json(self.table, 'league_table.json', 'w')
 def persist_table(self):
     dump_as_json(self.table, 'league_table.json', 'w')