Beispiel #1
0
 def test_dateoutput(self):
     for date, iso, isotime, yw, ym, yq in ((datetime.datetime(
             1990, 1, 10, 13, 1, 0), "1990-01-10", "1990-01-10 13:01:00",
                                             1990.02, 1990.01, 1990.1), ):
         self.assertEqual(toolkit.writeDate(date), iso)
         self.assertEqual(toolkit.writeDateTime(date), isotime)
         self.assertEqual(toolkit.getYW(date), yw)
         self.assertEqual(toolkit.getYM(date), ym)
         self.assertEqual(toolkit.getYQ(date), yq)
Beispiel #2
0
def createArticle(db,
                  headline,
                  date,
                  source,
                  batchid,
                  text,
                  texttype=2,
                  length=None,
                  byline=None,
                  section=None,
                  pagenr=None,
                  fullmeta=None,
                  url=None,
                  externalid=None,
                  parentUrl=None,
                  retrieveArticle=1):
    """
    Writes the article object to the database
    """
    # TODO link to parent if parentUrl is not None

    if toolkit.isDate(date): date = toolkit.writeDateTime(date, 1)
    if type(source) != int: source = source.id
    if type(fullmeta) == dict: fullmeta = ` fullmeta `

    if url and len(url) > 490: url = url[:490] + "..."

    (headline, byline, fullmeta, section), encoding = encodeAndLimitLength(
        [headline, byline, fullmeta, section], [740, 999999, 999999, 90])

    if pagenr and type(pagenr) in (types.StringTypes): pagenr = pagenr.strip()
    if text: text = text.strip()
    if length == None and text: length = len(text.split())

    q = {
        'date': date,
        'length': length,
        'metastring': fullmeta,
        'headline': headline,
        'byline': byline,
        'section': section,
        'pagenr': pagenr,
        'batchid': batchid,
        'mediumid': source,
        'url': url,
        'externalid': externalid,
        'encoding': encoding,
        # We don't store the parentUrl. Instead, we use the articles_postsings
        # table to store this information. This is done by the scraper class.
    }
    aid = db.insert('articles', q)
    text, encoding = dbtoolkit.encodeText(text)

    q = {
        'articleid': aid,
        'type': texttype,
        'encoding': encoding,
        'text': text
    }
    db.insert('texts', q, retrieveIdent=0)

    if retrieveArticle:
        return article.Article(db, aid)
def _default_json(obj):
    if not isinstance(obj, datetime.datetime): return unicode(obj)
    return writeDateTime(obj, year=True, seconds=False, time=False)
Beispiel #4
0
 def test_dateoutput(self):
     for date, iso, isotime, yw, ym, yq in (
         (datetime.datetime(1990, 1, 10, 13,1,0), "1990-01-10", "1990-01-10 13:01:00", 1990.02, 1990.01, 1990.1),
         ):
         self.assertEqual(toolkit.writeDate(date), iso)
         self.assertEqual(toolkit.writeDateTime(date), isotime)