Beispiel #1
0
    def test_medium(self):
        import functools

        header = ('kop', 'datum', 'tekst', 'med')
        data = [('kop1', '2001-01-01', '', 'Bla')]

        test = functools.partial(_run_test_csv,
                                 header,
                                 data,
                                 text="tekst",
                                 headline="kop",
                                 date="datum")
        articles = test(medium_name=None, medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_existing=Medium.get_or_create("1").id)
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "1")

        articles = test(medium_existing=Medium.get_or_create("1").id,
                        medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_name="bla2", medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_name="bla2",
                        medium_existing=Medium.get_or_create("2").id)
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "2")
Beispiel #2
0
    def test_medium(self):
        import functools

        header = ('kop', 'datum', 'tekst', 'med')
        data = [('kop1', '2001-01-01', '', 'Bla')]

        test = functools.partial(_run_test_csv, header, data, text="tekst", headline="kop", date="datum")
        articles = test(medium_name=None, medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_existing=Medium.get_or_create("1").id)
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "1")

        articles = test(medium_existing=Medium.get_or_create("1").id, medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_name="bla2", medium="med")
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "Bla")

        articles = test(medium_name="bla2", medium_existing=Medium.get_or_create("2").id)
        self.assertEqual(len(articles), 1)
        self.assertEqual(articles[0].medium.name, "2")
Beispiel #3
0
 def to_internal_value(self, data):
     try:
         int(data)
     except ValueError:
         return Medium.get_or_create(data)
     else:
         return super(MediumField, self).to_internal_value(data)
Beispiel #4
0
 def _scrape_unit(self, row):
     row = {k:v.decode("utf-8") for k,v in row.iteritems()}
     query = row.pop('zoekopdracht')
     self.queries.add(query)
     medium = Medium.get_or_create(row.pop('type bron'))
     date = row.pop('datum')
     date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M")
     headline = row.pop('titel')
     pagenr = row.pop('bereik') or None
     text = row.pop('bericht tekst')
     url = row.pop('url')
     author=row.pop('auteur')
     metastring = json.dumps(row)
     
     a = Article(headline=headline, pagenr=pagenr,
                 text=text, date=date,
                 medium=medium, url=url,
                 author=author, metastring=metastring)
     yield a 
Beispiel #5
0
    def restore_fields(self, data, files):
        # convert media from name to id, if needed
        data = data.copy() # make data mutable
        if 'medium' in data:
            try:
                int(data['medium'])
            except ValueError:
                if not hasattr(self, 'media'):
                    self.media = {}
                m = data['medium']
                if m not in self.media:
                    self.media[m] = Medium.get_or_create(m).id
                data['medium'] = self.media[m]

        # add time part to date, if needed
        if 'date' in data and len(data['date']) == 10:
            data['date'] += "T00:00"

        if 'project' not in data:
            data['project'] = self.context['view'].project.id

        return super(ArticleSerializer, self).restore_fields(data, files)
Beispiel #6
0
    def restore_fields(self, data, files):
        # convert media from name to id, if needed
        data = data.copy()  # make data mutable
        if 'medium' in data:
            try:
                int(data['medium'])
            except ValueError:
                m = data['medium']
                if m not in self.medium_ids:
                    mid = Medium.get_or_create(m).id
                    self.medium_ids[m] = mid
                    self.medium_names[mid] = m
                data['medium'] = self.medium_ids[m]

        # add time part to date, if needed
        if 'date' in data and len(data['date']) == 10:
            data['date'] += "T00:00"

        if 'project' not in data:
            data['project'] = self.context['view'].project.id

        return super(ArticleSerializer, self).restore_fields(data, files)
Beispiel #7
0
    def parse_document(self, paragraphs):
        metadata, text = parse_page(paragraphs)
        metadata["medium"] = Medium.get_or_create(metadata["medium"])

        return Article(text=text, **metadata)
Beispiel #8
0
    def parse_document(self, paragraphs):
        metadata, text = parse_page(paragraphs)
        metadata["medium"] = Medium.get_or_create(metadata["medium"])

        return Article(text=text, **metadata)