def parse_results(self): media_files = self.results for r in media_files: sub_site = r.get('source') collection_name = SOURCES[sub_site] collection = Collection.objects.get(name=collection_name) source_url = r.get('source_url') media_type = r.get('media_type') media_name = source_url.split('/')[-1] response = requests.get(source_url) title = r.get('title') # if the title id blank it causes an error if not title: title = 'No title was available' if response: if media_type == 'file': # save to documents media_file = File(BytesIO(response.content), name=media_name) file = Document(title=title, file=media_file, collection=collection) file.save() file.created_at = r.get('date') file.save() elif media_type == 'image': # save to images image_file = ImageFile(BytesIO(response.content), name=media_name) image = Image(title=title, file=image_file, collection=collection) image.save() image.created_at = r.get('date') image.save() else: sys.stdout.write( '⚠️ Got no response. Error has been logged importer/log/import_media_files.txt\n' ) with open('importer/log/import_media_files.txt', 'a') as the_file: the_file.write('{}\n'.format(r)) if self.next: time.sleep(self.sleep_between_fetches) self.fetch_url(self.next) self.parse_results() return Document.objects.count() + Image.objects.count(), 0
def make_documents(self): if self.document['type_of_publication'] == 'heading': return self.create_heading(self.document['heading_text']) elif self.document['type_of_publication'] == 'document': document = self.document['document'] if document: # lets get the file here, saves cluttering the block builder response = requests.get(document['url']) if response: media_file = File( BytesIO(response.content), name=document['filename'] ) file = Document( title=document['title'], file=media_file, collection=self.collection ) file.save() file.created_at = make_aware( dateutil.parser.parse(document['date'])) file.save() return self.create_document_type(file, document, self.document) else: with open('importer/log/make_documents_list_errors.txt', 'a') as the_file: the_file.write('{}: {}\n'.format( self.publication, self.publication.id)) elif self.document['type_of_publication'] == 'documentlink': # pass # document = self.data['document'] return self.create_link_type(self.document) elif self.document['type_of_publication'] == 'audiovideo': # pass # document = self.data['document'] return self.create_embed_type(self.document) elif self.document['type_of_publication'] == 'freetext': return self.create_free_text(self.document) # return self.stream_value """