def build(request): global CURRENT_DIR start = time.time() # noinspection PyBroadException try: path = request.GET.get('path') directory, created = Directory.objects.get_or_create(path=path) CURRENT_DIR = directory if created: directory.save() path_docs = utils.get_documents(path) db_docs = set(doc.filename for doc in directory.document_set.only('filename')) if path_docs != db_docs: utils.remove_caches(path) bulk = [] generated_docs = set() for doc in pathlib.Path(path).iterdir(): doc_path = str(doc) if doc.name.endswith('.pdf'): utils.fix_pdf(str(doc)) doc_path = str(doc)[:-4] + '.txt' generated_docs.add(doc_path[:-4]) elif not doc.name.endswith('.txt') or doc_path[:-4] in generated_docs: continue with open(doc_path) as file: title = file.readline(140) content = file.read(280) bulk.append(Document( directory=directory, filename='.'.join(doc.name.split('.')[:-1]), extension='.' + doc.name.split('.')[-1], title=title, content=content )) directory.document_set.all().delete() Document.objects.bulk_create(bulk) success = ui.build(path) except Exception: CURRENT_DIR = None success = False request.session.clear() return HttpResponse(round(time.time() - start, 2) if success else -1)
def list(request): # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): newdoc = Document(docfile = request.FILES['docfile']) newdoc.save() # Redirect to the document list after POST return HttpResponseRedirect(reverse('engine.views.list')) else: form = DocumentForm() # A empty, unbound form # Load documents for the list page documents = Document.objects.all() # Render list page with the documents and the form return render_to_response( 'engine/list.html', {'documents': documents, 'form': form}, context_instance=RequestContext(request) )
def display_cluster_engine(request): algorithms, datasets = database_manager.retrieve_interface_data() upload_file_name = "" # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): # remove whatever is in there now = datetime.datetime.now() if now.month < 10: mth = str(0)+str(now.month) else: mth = str(now.month) if now.day < 10: day = str(0)+str(now.day) else: day = str(now.day) path = '/static_media_clustapp/documents/'+str(now.year)+'/'+mth+'/'+day+'/' full_path = os.path.realpath('.') os.system('rm '+full_path+path+'*') # store uploaded file newdoc = Document(docfile = request.FILES['docfile']) newdoc.save() upload_file_name = request.FILES['docfile'] # debugging use, output the newest file uploaded print 'Newest File Uploaded' os.system('ls '+full_path+path) # Redirect to the document list after POST #return HttpResponseRedirect(reverse('engine.views.display_cluster_engine')) else: form = DocumentForm() # A empty, unbound form # Load documents for the list page documents = Document.objects.all() # Render list page with the documents and the form return render_to_response( 'engine/engine_interface.html', { 'documents': documents, 'form': form, 'algorithms':algorithms, 'datasets': datasets, 'upload':upload_file_name, }, context_instance=RequestContext(request) ) """
def handle(self, *args, **options): urls = [ 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_0.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_1.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_2.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_3.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_4.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_5.pdf', 'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_6.pdf' ] pages = [] elasticPages = [] d = Document( name='NUP4114', url='https://storage.yandexcloud.net/pdf-storage/NUP4114.pdf') for i in range(7): text = '' with open( f'./engine/management/commands/resourses/NUP4114/NUP4114-{i}-text.txt' ) as f: text = f.readline() vision = '' with open( f'./engine/management/commands/resourses/NUP4114/NUP4114-{i}-response.json' ) as f: vision = f.readline() pages.append( Page(url=urls[i], num=i + 1, text=text, vision=vision, tables=[])) elasticPages.append( ElasticPage(name=f'NUP4114-{i+1}', url=urls[i], num=i + 1, doc_name='NUP4114', document=d, text=text)) d.pages = pages d.save() for page in elasticPages: page.save() ### DOC2 urls = [ 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-0.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-1.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-2.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-3.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-4.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-5.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-6.pdf', 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-7.pdf' ] pages = [] elasticPages = [] d = Document( name='Apem_06172019_Q14', url= 'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261.pdf' ) for i in range(8): text = '' with open( f'./engine/management/commands/resourses/Apem_06172019_Q14_Series-1605261/Apem_06172019_Q14_Series-1605261-{i}-text.txt' ) as f: text = f.readline() vision = '' with open( f'./engine/management/commands/resourses/Apem_06172019_Q14_Series-1605261/Apem_06172019_Q14_Series-1605261-{i}-response.json' ) as f: vision = f.readline() pages.append( Page(url=urls[i], num=i + 1, text=text, vision=vision, tables=[])) elasticPages.append( ElasticPage(name=f'Apem_06172019_Q14-{i+1}', url=urls[i], num=i + 1, doc_name='Apem_06172019_Q14', document=d, text=text)) d.pages = pages d.save() for page in elasticPages: page.save() ### DOC3 urls = [ 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-0.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-1.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-2.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-3.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-4.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-5.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-6.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-7.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-8.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-9.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-10.pdf', 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-11.pdf', ] pages = [] elasticPages = [] d = Document( name='D_3120-N_ENG', url= 'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402.pdf' ) for i in range(12): text = '' with open( f'./engine/management/commands/resourses/D_3120-N_ENG-1525402/D_3120-N_ENG-1525402-{i}-text.txt' ) as f: text = f.readline() vision = '' with open( f'./engine/management/commands/resourses/D_3120-N_ENG-1525402/D_3120-N_ENG-1525402-{i}-response.json' ) as f: vision = f.readline() pages.append( Page(url=urls[i], num=i + 1, text=text, vision=vision, tables=[])) elasticPages.append( ElasticPage(name=f'D_3120-N_ENG-{i+1}', url=urls[i], num=i + 1, doc_name='D_3120-N_ENG', document=d, text=text)) d.pages = pages d.save() for page in elasticPages: page.save() ### DOC4 urls = [ 'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925-0.pdf', 'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925-1.pdf' ] pages = [] elasticPages = [] d = Document( name='ile-1553267814', url= 'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925.pdf' ) for i in range(2): text = '' with open( f'./engine/management/commands/resourses/ile-1553267814-1588925/ile-1553267814-1588925-{i}-text.txt' ) as f: text = f.readline() vision = '' with open( f'./engine/management/commands/resourses/ile-1553267814-1588925/ile-1553267814-1588925-{i}-response.json' ) as f: vision = f.readline() pages.append( Page(url=urls[i], num=i + 1, text=text, vision=vision, tables=[])) elasticPages.append( ElasticPage(name=f'ile-1553267814-{i+1}', url=urls[i], num=i + 1, doc_name='ile-1553267814', document=d, text=text)) d.pages = pages d.save() for page in elasticPages: page.save() ### DOC5 urls = [ 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-0.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-1.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-2.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-3.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-4.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-5.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-6.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-7.pdf', 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-8.pdf' ] pages = [] elasticPages = [] d = Document( name='steval-spin3204', url= 'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123.pdf' ) for i in range(9): text = '' with open( f'./engine/management/commands/resourses/steval-spin3204-1602123/steval-spin3204-1602123-{i}-text.txt' ) as f: text = f.readline() vision = '' with open( f'./engine/management/commands/resourses/steval-spin3204-1602123/steval-spin3204-1602123-{i}-response.json' ) as f: vision = f.readline() pages.append( Page(url=urls[i], num=i + 1, text=text, vision=vision, tables=[])) elasticPages.append( ElasticPage(name=f'steval-spin3204-{i+1}', url=urls[i], num=i + 1, doc_name='steval-spin3204', document=d, text=text)) d.pages = pages d.save() for page in elasticPages: page.save()