def handle(self, *args, **options): peklun = User.objects.get(username="******") for root, dirs, files in os.walk(options["input_dir"]): for f in files: if not os.path.isfile(os.path.join(root, f)): continue fname, ext = os.path.splitext(f) human_name = re.sub(r"[\s_]+", " ", fname.decode("utf-8")) doc_san_name = f.decode("utf-8") with open(os.path.join(root, f), "rb") as fp: doc_content = fp.read() doc_hash = sha1(f).hexdigest() try: doc_instance = Document.objects.get(hash=doc_hash) self.stderr.write('Skipping file {}'.format(doc_san_name)) except Document.DoesNotExist: self.stdout.write('Adding file {}'.format(doc_san_name)) if doc_san_name: doc_instance = Document(name_uk=human_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save()
def form_valid(self, form): ''' :returns: *"Media"* is assigned to an external URL if it's filled, otherwise it points to the uploaded file. ''' try: instance = Document(file_upload=self.request.FILES['upload_video']) instance.save() if not form.instance.media: form.instance.media=instance.accessURL except: pass return super(OverdubUpdateView, self).form_valid(form)
def form_valid(self, form): ''' :returns: *"Media"* is assigned to an external URL if it's filled, otherwise it points to the uploaded file. ''' try: instance = Document(file_upload=self.request.FILES['upload_video']) instance.save() if not form.instance.media: form.instance.media = instance.accessURL except: pass return super(OverdubUpdateView, self).form_valid(form)
def form_valid(self, form): # Auto set the following fields: form.instance.collection = get_object_or_404( ActivityCollection, pk=self.kwargs['pk']) form.instance.activity_type = self.activity_type try: instance = Document(file_upload = self.request.FILES['upload_video']) instance.save() if not form.instance.media: form.instance.media=instance.accessURL except: pass return super(OverdubCreateView, self).form_valid(form)
def form_valid(self, form): # Auto set the following fields: form.instance.collection = get_object_or_404(ActivityCollection, pk=self.kwargs['pk']) form.instance.activity_type = self.activity_type try: instance = Document(file_upload=self.request.FILES['upload_video']) instance.save() if not form.instance.media: form.instance.media = instance.accessURL except: pass return super(OverdubCreateView, self).form_valid(form)
def createDocument(self, request): name = request.POST.get('name', None) if name: s = Document(name=name) s.save() result = { 'status': 'success', 'data': 'Document added successfully' } else: result = { 'status': 'failure', 'error': "Please enter the document name" } return result
def getAllDocuments(self, request): documents = Document.getAllDocuments() serialized_obj = serializers.serialize('json', documents) if serialized_obj: serialized_obj = json.loads(serialized_obj) result = {'status': 'success', 'data': serialized_obj} return result
def list(request): # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): newdoc = Document(docfile=request.FILES['docfile']) newdoc.save() # Redirect to the document list after POST return HttpResponseRedirect(reverse('core:list')) else: form = DocumentForm() # A empty, unbound form # Load documents for the list page documents = Document.objects.all() # Render list page with the documents and the form return render(request, 'core/list.html', {'documents': documents, 'form': form})
def read(self, document_name, options): """ Reads Document() data from DMS Method creates, instantiates and populates new Document() object. Using name and/or search filter criteria provided. Currently can read Document() with file object attached or either read only file info data. """ log.debug("READ Document %s with options: %s" % (document_name, options)) doc = Document() operator = PluginsOperator() # Checking if name really possible in current DMS config. try: doc.set_filename(document_name) except DmsException, e: self.errors.append(unicode(e.parameter)) return doc pass
def setUpDatebase(): Document = cm.Document if Document.objects.all().count() == 0: samplePo = tempfile.NamedTemporaryFile() with open("./core/testUtils/sample.po") as fp: samplePo.write(bytes(fp.read(), 'utf-8')) samplePo.seek(0) FileObj = File(samplePo, name='cvpcb') docobj = Document(Name='cvpcb', PoFile=FileObj) docobj.save() PoFile: File = docobj.PoFile fileName = samplePo.name PoFile: polib.POFile = polib.pofile(fileName) for entry in PoFile: POEntry(Raw=pickle.dumps(entry), Doc=docobj, Translated=entry.translated(), Msgid=entry.msgid, Msgstr=entry.msgstr).save() return PoFile else: Document = Document.objects.all()[0] return polib.pofile(Document.PoFile.file.name)
def handle(self, *args, **opt): if opt['verbosity'] == 0: logging.basicConfig(level=logging.CRITICAL) elif opt['verbosity'] == 1: logging.basicConfig(level=logging.ERROR) elif opt['verbosity'] == 2: logging.basicConfig(level=logging.INFO) elif opt['verbosity'] == 3: logging.basicConfig(level=logging.DEBUG) if opt['poName'] == None: BaseName = os.path.splitext(os.path.basename(opt['file']))[0] else: BaseName = opt['poName'] PoFile = polib.pofile(opt['file']) docObj = None if Document.objects.filter(Name__exact=BaseName).count( ) > 0: # Check whether this doc has been inserted or not docObj = Document.objects.get(Name__exact=BaseName) if not opt['isUpdate']: logging.warning("PO文件已存在,跳过添加") sys.exit(0) FileObj = File(open(opt['file'], 'r'), name=BaseName) if docObj == None: # Construct a new doc obj docObj = Document() docObj.Name = BaseName docObj.PoFile = FileObj docObj.save() # Update or Insert if opt['isUpdate']: queryset = POEntry.objects.filter(Doc__Name__exact=docObj.Name) logging.info("删除原有条目(total:%d)" % (len(queryset))) queryset.delete() total = len(PoFile) idx = 0 with transaction.atomic(): for entry in PoFile: idx += 1 logging.debug("添加翻译条目(%d/%d)" % (idx, total)) POEntry(Raw=pickle.dumps(entry), Doc=docObj, Translated=entry.translated(), Msgid=entry.msgid, Msgstr=entry.msgstr).save()
def zip_import(self, request): if request.method == "GET": return render( request, "admin/core/document/zip_import.html", {"form": ZIPImportForm()}, ) if request.method == "POST": form = ZIPImportForm(request.POST, request.FILES) if not form.is_valid(): return render(request, "admin/core/document/zip_import.html", {"form": form}) with ZipFile(request.FILES["zipfile"]) as zip_arch: for finfo in zip_arch.infolist(): fullname = finfo.filename if fullname.endswith("/"): continue _, fname = os.path.split(fullname) if fname.startswith("._"): continue if finfo.file_size < 2048: continue try: dec_fname = unicode(fname) except UnicodeDecodeError: dec_fname = fname.decode("cp866") fname, ext = os.path.splitext(dec_fname) doc_san_name = fname.decode("utf-8") human_name = re.sub(r"[\s_]+", " ", doc_san_name) doc_san_name = "{}.{}".format(doc_san_name, ext) with zip_arch.open(fullname, "r") as fp: doc_content = fp.read() doc_hash = sha1(doc_content).hexdigest() try: doc_instance = Document.objects.get(hash=doc_hash) self.message_user( request, "Skipping file {} as it already exists".format( doc_san_name), level=messages.WARNING, ) except Document.DoesNotExist: self.message_user( request, "Adding file {}".format(doc_san_name)) if doc_san_name: doc_instance = Document(name_uk=human_name, uploader=request.user, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.guess_doc_type() return redirect(reverse("admin:core_document_changelist"))
def handle(self, *args, **options): peklun = User.objects.get(username="******") wks = get_spreadsheet().sheet1 for i, l in enumerate(wks.get_all_records()): # reopen it time from time to avoid disconnect by timeout if i % 2000 == 0 and i: wks = get_spreadsheet().sheet1 self.stdout.write('Processing line #{}'.format(i)) company_ipn = l.get("ІПН", "") company_name = l.get("Назва", "") person_id = l.get("id персони", "") company_id = l.get("id компанії", "") photo_url = l.get("Фото", "") person = None # First let's search for appropriate company company = self.process_company(company_id, company_ipn, company_name) # No company — no go if company is None: continue # Let's backwrite company id to the spreadsheet for further use if company.pk != company_id: company_id = company.pk wks.update_cell(i + 2, len(l.keys()), company.pk) person_name = l.get("ПІБ", "").strip() position = l.get("Посада", "").strip() person_dob = unicode(l.get("Дата народження", "")).strip() person_from = parse_date(l.get("Дата призначення", "")) person_to = parse_date(l.get("Дата звільнення", "")) doc_received = parse_date(l.get("Дата відповіді", "")) docs = l.get("Лінк на відповідь", "").strip() website = l.get("лінк на сайт", "").strip() # Now let's search for the person if person_name: last_name, first_name, patronymic, _ = parse_fullname( person_name) if not last_name: continue # First we search by person_id (if it's present) if person_id: try: person = Person.objects.get(pk=person_id) except Person.DoesNotExist: pass # If nothing is found we search for name (for now) if not person: try: person = Person.objects.get( first_name_uk__iexact=first_name, last_name_uk__iexact=last_name, patronymic_uk__iexact=patronymic) except Person.MultipleObjectsReturned: self.stderr.write( "Double person {}!".format(person_name)) except Person.DoesNotExist: pass # If nothing is found, let's create a record for that person if not person: person = Person() self.stderr.write( "Created new person {}".format(person_name)) person.first_name_uk = first_name person.last_name_uk = last_name person.patronymic_uk = patronymic Ua2RuDictionary.objects.get_or_create(term=first_name) Ua2RuDictionary.objects.get_or_create(term=last_name) Ua2RuDictionary.objects.get_or_create(term=patronymic) person.first_name_en = translitua(first_name) person.last_name_en = translitua(last_name) person.patronymic_en = translitua(patronymic) person.is_pep = True person.imported = True person.type_of_official = 1 # Parsing date (can be a full date or just a year or # year/month) if person_dob: person.dob = parse_date(person_dob) if len(person_dob) == 4: person.dob_details = 2 # Only year if len(person_dob) > 4 and len(person_dob) < 7: person.dob_details = 1 # month and year # Let's download the photo (if any) if not person.photo and photo_url: photo_name, photo_san_name, photo_content = download( photo_url, translitua(person_name)) if photo_name: person.photo.save(photo_san_name, ContentFile(photo_content)) else: self.stdout.write("Cannot download image %s for %s" % (photo_url, person_name)) person.save() # Let's write the person id back to the table. if person.pk != person_id: person_id = person.pk wks.update_cell(i + 2, len(l.keys()) - 1, person.pk) # Now let's download all supporting docs docs_downloaded = [] first_doc_name = False # There might be many of them for doc in docs.split(", "): doc_instance = None # we cannot download folders from google docs, so let's # skip them if doc and "folderview" not in doc \ and "drive/#folders" not in doc: doc = expand_gdrive_download_url(doc) doc_hash = sha1(doc).hexdigest() # Check, if docs try: doc_instance = Document.objects.get(hash=doc_hash) except Document.DoesNotExist: self.stdout.write( 'Downloading file {}'.format(doc)) doc_name, doc_san_name, doc_content = download(doc) doc_san_name = translitua(doc_san_name) if doc_name: doc_instance = Document(name_uk=doc_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save() else: self.stdout.write( 'Cannot download file {}'.format(doc)) if doc_instance: first_doc_name = doc_instance.name_uk docs_downloaded.append(doc_instance.doc.url) # Now let's setup links between person and companies links = Person2Company.objects.filter( (Q(date_established=person_from) | Q(date_established=mangle_date(person_from)) | Q(date_established__isnull=True)), (Q(date_finished=person_to) | Q(date_finished=mangle_date(person_to)) | Q(date_finished__isnull=True)), from_person=person, to_company=company) # Delete if there are doubling links # including those cases when dates were imported incorrectly # because of parse_date if len(links) > 1: links.delete() link, _ = Person2Company.objects.update_or_create( from_person=person, to_company=company, date_established=person_from, date_established_details=0, date_finished=person_to, date_finished_details=0) if not link.relationship_type: link.relationship_type = position # And translate them Ua2EnDictionary.objects.get_or_create( term=lookup_term(position)) # oh, and add links to supporting docs all_docs = docs_downloaded + website.split(", ") if all_docs: link.proof = ", ".join(filter(None, all_docs)) if first_doc_name: link.proof_title = first_doc_name link.date_confirmed = doc_received link.is_employee = True link.save()
def init_Document_with_data(self, options, doc=None, document_name=None, document_file=None): """Populate given Document() class with given properties from "options" provided Makes expansion of interaction methods with Document() simple. Expand this actions to add new interactions with Document() object... Connector between "options" passed to this CRUD manager and later Plugin() interactions. @options is a dict of operation options (that change behaviour of operations) @doc is a Document() instance @document_name is a name of a document being processed @document_file is a file object being processed """ if doc is None: doc = Document() # All methods sequence, besides create() if document_name: doc.set_filename(document_name) # Usually create() method sequence if document_file: doc.set_file_obj(document_file) if hasattr(document_file, "content_type"): doc.set_mimetype(document_file.content_type) if options: try: for property_name, value in options.iteritems(): if property_name == "hashcode": doc.set_hashcode(value) if property_name == "revision": doc.set_revision(value) # Run for plugins without retrieving document. Only count file info data. if property_name == "revision_count": doc.update_options({"revision_count": True, "only_metadata": True}) if property_name == "extension": doc.set_requested_extension(value) if property_name == "tag_string": if value: doc.set_tag_string(value) doc.update_options({property_name: value}) if property_name == "remove_tag_string": if value: doc.set_remove_tag_string(value) doc.update_options({property_name: value}) if property_name == "new_indexes": doc.update_db_info(value) if property_name == "user": doc.set_user(value) if property_name == "index_info": # Not to modify original data during workflow data = value.copy() doc.set_db_info(data) if property_name == "new_type": if value: doc.set_change_type(value) if property_name == "mark_revision_deleted": if value: doc.update_options({property_name: value}) if property_name == "delete_revision": if value: doc.update_options({property_name: value}) doc.set_revision(int(value)) if property_name in ["indexing_data", "thumbnail", "mark_deleted", "remove_thumbnails"]: if value: doc.update_options({property_name: True}) if property_name == "update_file": doc.set_file_obj(value) if value: # Option for update function so we know we have a file update sequence doc.update_options({property_name: True}) if "only_metadata" in options: doc.update_options({"only_metadata": options["only_metadata"]}) except Exception, e: self.errors.append("Error working with Object: %s" % e) log.error( "DocumentManager().init_Document_with_data() error: %s, doc: %s, options: %s, document_name: %s" % (e, doc, options, document_name) ) pass # Every method call should have a Django User inside. Validating that. if not "user" in options and not options["user"]: error = 'Wrong DocumentProcessor() method call. Should have a proper "user" option set' log.error(error) self.errors.append(error) raise DmsException(error, 500)
def get_upload_details(request): # Handle the file upload if request.method == 'POST': form = UploadDetailForm(request.POST) # Check the form is valid, and that we have a file to work with if form.is_valid() and request.session.get('upload_file', False): # Get the information that we stored on the file temp_file = request.session['upload_file'] temp_file_sum = temp_file.md5_sum # Generate a document instance from the details the user provided new_document = Document(md5_sum = temp_file_sum) new_document.file.save(temp_file.name, temp_file.file, False) new_document.title = form.cleaned_data['title'] # Save the document and clean up temp files and stuff new_document.save() log.info("Saved a new document: %s", new_document.id) clean_temp_file(temp_file) del request.session['upload_file'] # Create a PDF and pngs create_pdf.delay(new_document.id, callback=create_pngs.subtask((new_document.id,))) if request.session.get('upload_file', False): temp_file = request.session['upload_file'] data = {'filename': temp_file.name} try: try: file_data = solr.extract(temp_file.file) except IOError, e: log.warn('Solr extraction failed: %s', e) file_data = {'metadata': None} log.debug(file_data['metadata']) try: if file_data['metadata']: data['title'] = file_data['metadata']['title'][0] or data['filename'] data['author'] = file_data['metadata']['Author'][0] except: pass data['title'] = data['filename'] details_form = UploadDetailForm(data) # Redirect the user to the upload information page after POST return render_to_response('upload/upload_details.html', {'form': details_form, 'file_name': data['filename']}, context_instance=RequestContext(request) ) except SolrError: # Return an error message return render_to_response('upload/upload_details.html', {'error': 'Solr extraction error', 'file_name': data['filename']}, context_instance=RequestContext(request) )
def get(self, request, *args, **kwargs): documents = Document.getAllDocuments() user = request.user divisions = Division.objects.all() return render(request, self.template_name, {'documents': documents , 'user': user, 'divisions': divisions})
def handle(self, *args, **options): Document.objects.all().delete() documents = [] for file in glob.glob(options["files"]): json_dos = json.load(open(file))['document'] doc = Document( xsi_type=json_dos['@xsi:type'], uid=json_dos['uid'], legislature=json_dos.get('legislature'), cycleDeVie_chrono_dateCreation=json_dos['cycleDeVie']['chrono'] ['dateCreation'], cycleDeVie_chrono_dateDepot=json_dos['cycleDeVie']['chrono'] ['dateDepot'], cycleDeVie_chrono_datePublication=json_dos['cycleDeVie'] ['chrono']['datePublication'], cycleDeVie_chrono_datePublicationWeb=json_dos['cycleDeVie'] ['chrono']['datePublicationWeb'], denominationStructurelle=json_dos['denominationStructurelle'], provenance=json_dos.get('provenance'), titres_titrePrincipal=json_dos['titres']['titrePrincipal'], titres_titrePrincipalCourt=json_dos['titres'] ['titrePrincipalCourt'], dossierRef=json_dos['dossierRef'], classification_famille_depot_code=json_dos['classification'] ['famille']['depot']['code'], classification_famille_depot_libelle=json_dos['classification'] ['famille']['depot']['libelle'], classification_famille_classe_code=json_dos['classification'] ['famille']['classe']['code'], classification_famille_classe_libelle=json_dos[ 'classification']['famille']['classe']['libelle'], classification_famille_espece_code=json_dos['classification'] ['famille'].get('espece', {}).get('code'), classification_famille_espece_libelle=json_dos[ 'classification']['famille'].get('espece', {}).get('libelle'), classification_type_code=json_dos['classification']['type'] ['code'], classification_type_libelle=json_dos['classification']['type'] ['libelle'], classification_sousType_code=json_dos['classification'] ['sousType']['code'] if json_dos['classification']['sousType'] else None, classification_sousType_libelle=json_dos['classification'] ['sousType'].get('libelle') if json_dos['classification']['sousType'] else None, classification_sousType_libelleEdition=json_dos[ 'classification']['sousType'].get('libelleEdition') if json_dos['classification']['sousType'] else None, classification_statutAdoption=json_dos['classification'] ['statutAdoption'], correction=json_dos['correction'], notice_numNotice=json_dos['notice'].get('numNotice'), notice_formule=json_dos['notice'].get('formule'), notice_adoptionConforme=json_dos['notice']['adoptionConforme'], indexation=json_dos['indexation'], imprimerie_ISSN=json_dos['imprimerie'].get('ISSN') if json_dos['imprimerie'] else None, imprimerie_ISBN=json_dos['imprimerie'].get('ISBN') if json_dos['imprimerie'] else None, imprimerie_DIAN=json_dos['imprimerie'].get('DIAN') if json_dos['imprimerie'] else None, imprimerie_nbPage=json_dos['imprimerie'].get('nbPage') if json_dos['imprimerie'] else None, imprimerie_prix=json_dos['imprimerie']['prix'] if json_dos['imprimerie'] else None, ) documents.append(doc) print("creating", len(documents), "documents") Document.objects.bulk_create(documents)
def savedocument(request): print(request) print("POST DATA") print(request.POST) print("FILES DATA") print(request.FILES) post_data = request.POST request.FILES.get('filepath') # doc = request.FILES['docs'] # thumbnail = request.FILES['thumb'] doc = request.FILES.get('docs') thumbnail = request.FILES.get('thumb') if doc and thumbnail: document = Document( title = post_data['title'], summary = post_data['summary'], description = post_data['description'], category = post_data['category'], doctype = post_data['type'], author = post_data['author'], document = request.FILES['docs'], thumbnail = request.FILES['thumb'], ) document.save() elif doc: document = Document( title = post_data['title'], summary = post_data['summary'], description = post_data['description'], category = post_data['category'], doctype = post_data['type'], author = post_data['author'], document = request.FILES['docs'], ) document.save() elif thumbnail: document = Document( title = post_data['title'], summary = post_data['summary'], description = post_data['description'], category = post_data['category'], doctype = post_data['type'], author = post_data['author'], thumbnail = request.FILES['thumb'], ) document.save() else: document = Document( title = post_data['title'], summary = post_data['summary'], description = post_data['description'], category = post_data['category'], doctype = post_data['type'], author = post_data['author'], ) document.save() return redirect('dashboard')