Ejemplo n.º 1
0
def ephys_table_identify():
    artObs = m.Article.objects.filter(datatable__isnull=False, articlefulltext__isnull=False).distinct()
    artObs = artObs.exclude(articlefulltext__articlefulltextstat__data_table_ephys_processed=True)
    dataTableObs = m.DataTable.objects.filter(article__in=artObs).distinct()
    num_tables = dataTableObs.count()
    print "analyzing %s tables" % num_tables
    for i, dt in enumerate(dataTableObs):
        prog(i, num_tables)
        assocDataTableEphysVal(dt)
        art = dt.article
        aft_ob = art.get_full_text()
        if aft_ob is not None:
            aftStatOb = m.ArticleFullTextStat.objects.get_or_create(article_full_text=aft_ob)[0]
            aftStatOb.data_table_ephys_processed = True
            aftStatOb.save()
Ejemplo n.º 2
0
            tableSoup = BeautifulSoup(table)
            table_html = str(tableSoup)
            table_html = add_id_tags_to_table(table_html)
            table_text = tableSoup.get_text()
            table_text = table_text[0 : min(9999, len(table_text))]
            data_table_ob = m.DataTable.objects.get_or_create(article=a, table_html=table_html, table_text=table_text)[
                0
            ]
            data_table_ob = addIdsToTable(data_table_ob)  # add table id elements if there aren't any
            data_table_ob = remove_spurious_table_headers(
                data_table_ob
            )  # takes care of weird header thing for elsevier xml tables
            ds = m.DataSource.objects.get_or_create(data_table=data_table_ob)[0]

            # apply initial text mining of ephys concepts to table
            assocDataTableEphysVal(data_table_ob)

        # text mine article level metadata
        apply_article_metadata(a)

    except Exception, e:
        with open("failed_files.txt", "a") as f:
            f.write("%s\\%s" % (file_name, e))
        print e
        print file_name
    finally:
        f.close()
    #     if html_tables is not None:
    # do a check to see if tables already exist, if do, just return
    #         if a.datatable_set.all().count() > 0:
    #             return a