Example #1
0
def build(request):
    global CURRENT_DIR
    start = time.time()
    # noinspection PyBroadException
    try:
        path = request.GET.get('path')

        directory, created = Directory.objects.get_or_create(path=path)
        CURRENT_DIR = directory
        if created:
            directory.save()

        path_docs = utils.get_documents(path)
        db_docs = set(doc.filename for doc in directory.document_set.only('filename'))

        if path_docs != db_docs:
            utils.remove_caches(path)
            bulk = []
            generated_docs = set()

            for doc in pathlib.Path(path).iterdir():
                doc_path = str(doc)

                if doc.name.endswith('.pdf'):
                    utils.fix_pdf(str(doc))
                    doc_path = str(doc)[:-4] + '.txt'
                    generated_docs.add(doc_path[:-4])

                elif not doc.name.endswith('.txt') or doc_path[:-4] in generated_docs:
                    continue

                with open(doc_path) as file:
                    title = file.readline(140)
                    content = file.read(280)

                bulk.append(Document(
                    directory=directory,
                    filename='.'.join(doc.name.split('.')[:-1]),
                    extension='.' + doc.name.split('.')[-1],
                    title=title,
                    content=content
                ))
            directory.document_set.all().delete()
            Document.objects.bulk_create(bulk)

        success = ui.build(path)
    except Exception:
        CURRENT_DIR = None
        success = False
    request.session.clear()
    return HttpResponse(round(time.time() - start, 2) if success else -1)
Example #2
0
def list(request):

    # Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():
            newdoc = Document(docfile = request.FILES['docfile'])
            newdoc.save()

            # Redirect to the document list after POST
            return HttpResponseRedirect(reverse('engine.views.list'))
    else:
        form = DocumentForm() # A empty, unbound form

    # Load documents for the list page
    documents = Document.objects.all()

    # Render list page with the documents and the form

    return render_to_response(
        'engine/list.html',
        {'documents': documents, 'form': form},
        context_instance=RequestContext(request)
    )
Example #3
0
def display_cluster_engine(request):

    algorithms, datasets = database_manager.retrieve_interface_data()
    upload_file_name = ""

	# Handle file upload
    if request.method == 'POST':
        form = DocumentForm(request.POST, request.FILES)
        if form.is_valid():

            # remove whatever is in there
            now = datetime.datetime.now()
            if now.month < 10:
                mth = str(0)+str(now.month)
            else:
                mth = str(now.month)

            if now.day < 10:
                day = str(0)+str(now.day)
            else:
                day = str(now.day)

            path = '/static_media_clustapp/documents/'+str(now.year)+'/'+mth+'/'+day+'/'
            full_path = os.path.realpath('.')
            os.system('rm '+full_path+path+'*')


            # store uploaded file
            newdoc = Document(docfile = request.FILES['docfile'])
            newdoc.save()

            upload_file_name = request.FILES['docfile']

            # debugging use, output the newest file uploaded
            print 'Newest File Uploaded'
            os.system('ls '+full_path+path)



            # Redirect to the document list after POST
            #return HttpResponseRedirect(reverse('engine.views.display_cluster_engine'))
    else:
        form = DocumentForm() # A empty, unbound form

    # Load documents for the list page
    documents = Document.objects.all()

    # Render list page with the documents and the form
    return render_to_response(
        'engine/engine_interface.html',
        {
            'documents': documents,
            'form': form,
            'algorithms':algorithms,
            'datasets': datasets,
            'upload':upload_file_name,

        },
        context_instance=RequestContext(request)
    )

    """
Example #4
0
    def handle(self, *args, **options):
        urls = [
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_0.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_1.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_2.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_3.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_4.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_5.pdf',
            'https://storage.yandexcloud.net/pdf-storage/NUP4114_page_6.pdf'
        ]
        pages = []
        elasticPages = []

        d = Document(
            name='NUP4114',
            url='https://storage.yandexcloud.net/pdf-storage/NUP4114.pdf')
        for i in range(7):
            text = ''
            with open(
                    f'./engine/management/commands/resourses/NUP4114/NUP4114-{i}-text.txt'
            ) as f:
                text = f.readline()

            vision = ''
            with open(
                    f'./engine/management/commands/resourses/NUP4114/NUP4114-{i}-response.json'
            ) as f:
                vision = f.readline()

            pages.append(
                Page(url=urls[i],
                     num=i + 1,
                     text=text,
                     vision=vision,
                     tables=[]))

            elasticPages.append(
                ElasticPage(name=f'NUP4114-{i+1}',
                            url=urls[i],
                            num=i + 1,
                            doc_name='NUP4114',
                            document=d,
                            text=text))

        d.pages = pages
        d.save()
        for page in elasticPages:
            page.save()

        ### DOC2
        urls = [
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-0.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-1.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-2.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-3.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-4.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-5.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-6.pdf',
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261-7.pdf'
        ]

        pages = []
        elasticPages = []

        d = Document(
            name='Apem_06172019_Q14',
            url=
            'https://storage.yandexcloud.net/pdf-storage/Apem_06172019_Q14_Series-1605261.pdf'
        )
        for i in range(8):
            text = ''
            with open(
                    f'./engine/management/commands/resourses/Apem_06172019_Q14_Series-1605261/Apem_06172019_Q14_Series-1605261-{i}-text.txt'
            ) as f:
                text = f.readline()

            vision = ''
            with open(
                    f'./engine/management/commands/resourses/Apem_06172019_Q14_Series-1605261/Apem_06172019_Q14_Series-1605261-{i}-response.json'
            ) as f:
                vision = f.readline()

            pages.append(
                Page(url=urls[i],
                     num=i + 1,
                     text=text,
                     vision=vision,
                     tables=[]))

            elasticPages.append(
                ElasticPage(name=f'Apem_06172019_Q14-{i+1}',
                            url=urls[i],
                            num=i + 1,
                            doc_name='Apem_06172019_Q14',
                            document=d,
                            text=text))

        d.pages = pages
        d.save()
        for page in elasticPages:
            page.save()

        ### DOC3
        urls = [
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-0.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-1.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-2.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-3.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-4.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-5.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-6.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-7.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-8.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-9.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-10.pdf',
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402-11.pdf',
        ]
        pages = []
        elasticPages = []

        d = Document(
            name='D_3120-N_ENG',
            url=
            'https://storage.yandexcloud.net/pdf-storage/D_3120-N_ENG-1525402.pdf'
        )
        for i in range(12):
            text = ''
            with open(
                    f'./engine/management/commands/resourses/D_3120-N_ENG-1525402/D_3120-N_ENG-1525402-{i}-text.txt'
            ) as f:
                text = f.readline()

            vision = ''
            with open(
                    f'./engine/management/commands/resourses/D_3120-N_ENG-1525402/D_3120-N_ENG-1525402-{i}-response.json'
            ) as f:
                vision = f.readline()

            pages.append(
                Page(url=urls[i],
                     num=i + 1,
                     text=text,
                     vision=vision,
                     tables=[]))

            elasticPages.append(
                ElasticPage(name=f'D_3120-N_ENG-{i+1}',
                            url=urls[i],
                            num=i + 1,
                            doc_name='D_3120-N_ENG',
                            document=d,
                            text=text))

        d.pages = pages
        d.save()
        for page in elasticPages:
            page.save()

        ### DOC4
        urls = [
            'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925-0.pdf',
            'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925-1.pdf'
        ]
        pages = []
        elasticPages = []

        d = Document(
            name='ile-1553267814',
            url=
            'https://storage.yandexcloud.net/pdf-storage/ile-1553267814-1588925.pdf'
        )
        for i in range(2):
            text = ''
            with open(
                    f'./engine/management/commands/resourses/ile-1553267814-1588925/ile-1553267814-1588925-{i}-text.txt'
            ) as f:
                text = f.readline()

            vision = ''
            with open(
                    f'./engine/management/commands/resourses/ile-1553267814-1588925/ile-1553267814-1588925-{i}-response.json'
            ) as f:
                vision = f.readline()

            pages.append(
                Page(url=urls[i],
                     num=i + 1,
                     text=text,
                     vision=vision,
                     tables=[]))

            elasticPages.append(
                ElasticPage(name=f'ile-1553267814-{i+1}',
                            url=urls[i],
                            num=i + 1,
                            doc_name='ile-1553267814',
                            document=d,
                            text=text))

        d.pages = pages
        d.save()
        for page in elasticPages:
            page.save()

        ### DOC5
        urls = [
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-0.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-1.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-2.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-3.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-4.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-5.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-6.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-7.pdf',
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123-8.pdf'
        ]
        pages = []
        elasticPages = []

        d = Document(
            name='steval-spin3204',
            url=
            'https://storage.yandexcloud.net/pdf-storage/steval-spin3204-1602123.pdf'
        )
        for i in range(9):
            text = ''
            with open(
                    f'./engine/management/commands/resourses/steval-spin3204-1602123/steval-spin3204-1602123-{i}-text.txt'
            ) as f:
                text = f.readline()

            vision = ''
            with open(
                    f'./engine/management/commands/resourses/steval-spin3204-1602123/steval-spin3204-1602123-{i}-response.json'
            ) as f:
                vision = f.readline()

            pages.append(
                Page(url=urls[i],
                     num=i + 1,
                     text=text,
                     vision=vision,
                     tables=[]))

            elasticPages.append(
                ElasticPage(name=f'steval-spin3204-{i+1}',
                            url=urls[i],
                            num=i + 1,
                            doc_name='steval-spin3204',
                            document=d,
                            text=text))

        d.pages = pages
        d.save()
        for page in elasticPages:
            page.save()