예제 #1
0
def load_images_to_db(path):
    for dirname, dirnames, filenames in os.walk(path):
        for subdirname in dirnames:
            subject_path = os.path.join(dirname, subdirname)
            label = Label.get_or_create(name=subdirname)[0]
            label.save()
            for filename in os.listdir(subject_path):
                path = os.path.abspath(os.path.join(subject_path, filename))
                # logging.info('saving path %s' % path)
                image = Image.get_or_create(path=path, label=label)[0]
                image.save()
예제 #2
0
파일: scrape.py 프로젝트: julians/zcraper
def get_article_data(unique_id):
    obj = untangle.parse(unique_id)

    try:
        article = obj.article
    except AttributeError:
        try:
            article = obj.link
        except AttributeError:
            try:
                article = obj.gallery
            except AttributeError:
                article = obj.video

    head = article.head
    body = article.body

    # author_unique_id = head.author["href"]
    # author_name = head.author.display_name.cdata.strip()
    # author_image_id = head.author.image["base-id"].strip()
    # author_image_copyright = head.author.image.copyright.cdata.strip()

    supertitle = body.supertitle.cdata.strip()
    title = body.title
    if isinstance(title, list):
        title = title[0]
    title = title.cdata.strip()
    subtitle = body.subtitle.cdata.strip()

    try:
        image_id = head.image["base-id"].strip()
        image_copyright = head.image.copyright.cdata.strip()
        image_caption = head.image.bu.cdata.strip()
    except AttributeError:
        image_id = None

    first_released = None
    for attribute in head.attribute:
        if attribute["name"] == "date_first_released":
            first_released = arrow.get(attribute.cdata).datetime

    # author = Author.get_or_create(
    #    unique_id=author_unique_id,
    #    defaults={
    #        "name": author_name
    #    })
    #
    # if author[1]:
    #    author_image = Image.get_or_create(
    #        unique_id=author_image_id,
    #        defaults={
    #            "copyright": author_image_copyright
    #        })
    #    author[0].image = author_image[0]
    #    author[0].save()

    article_image = None
    if image_id and len(image_id):
        article_image = Image.get_or_create(
            unique_id=image_id,
            defaults={"copyright": image_copyright, "caption": image_caption},
        )[0]

    aufmacher = Aufmacher.create(
        unique_id=unique_id,
        supertitle=supertitle,
        title=title,
        subtitle=subtitle,
        first_released=first_released,
        # author=author[0],
        image=article_image,
    )

    TweetJob.create(aufmacher=aufmacher)

    return aufmacher