Example #1
0
    def parse_results(self):
        media_files = self.results

        for r in media_files:
            sub_site = r.get('source')
            collection_name = SOURCES[sub_site]
            collection = Collection.objects.get(name=collection_name)
            source_url = r.get('source_url')
            media_type = r.get('media_type')
            media_name = source_url.split('/')[-1]
            response = requests.get(source_url)
            title = r.get('title')  # if the title id blank it causes an error
            if not title:
                title = 'No title was available'
            if response:

                if media_type == 'file':  # save to documents

                    media_file = File(BytesIO(response.content),
                                      name=media_name)
                    file = Document(title=title,
                                    file=media_file,
                                    collection=collection)
                    file.save()
                    file.created_at = r.get('date')
                    file.save()

                elif media_type == 'image':  # save to images

                    image_file = ImageFile(BytesIO(response.content),
                                           name=media_name)
                    image = Image(title=title,
                                  file=image_file,
                                  collection=collection)
                    image.save()
                    image.created_at = r.get('date')
                    image.save()

            else:
                sys.stdout.write(
                    '⚠️ Got no response. Error has been logged importer/log/import_media_files.txt\n'
                )
                with open('importer/log/import_media_files.txt',
                          'a') as the_file:
                    the_file.write('{}\n'.format(r))

        if self.next:
            time.sleep(self.sleep_between_fetches)
            self.fetch_url(self.next)
            self.parse_results()
        return Document.objects.count() + Image.objects.count(), 0
Example #2
0
    def make_documents(self):

        if self.document['type_of_publication'] == 'heading':
            return self.create_heading(self.document['heading_text'])

        elif self.document['type_of_publication'] == 'document':
            document = self.document['document']
            if document:
                # lets get the file here, saves cluttering the block builder
                response = requests.get(document['url'])
                if response:
                    media_file = File(
                        BytesIO(response.content),
                        name=document['filename']
                    )
                    file = Document(
                        title=document['title'],
                        file=media_file,
                        collection=self.collection
                    )
                    file.save()
                    file.created_at = make_aware(
                        dateutil.parser.parse(document['date']))
                    file.save()
                    return self.create_document_type(file, document, self.document)
                else:
                    with open('importer/log/make_documents_list_errors.txt', 'a') as the_file:
                        the_file.write('{}: {}\n'.format(
                            self.publication, self.publication.id))

        elif self.document['type_of_publication'] == 'documentlink':
            # pass
            # document = self.data['document']
            return self.create_link_type(self.document)

        elif self.document['type_of_publication'] == 'audiovideo':
            # pass
            # document = self.data['document']
            return self.create_embed_type(self.document)

        elif self.document['type_of_publication'] == 'freetext':
            return self.create_free_text(self.document)

        # return self.stream_value

        """