# -*- coding: utf-8 -*- """ Created on Mon Apr 25 15:28:21 2016 @author: Matteus """ from trainPlay import refsecSearch import sqlite3 path = 'http://arxiv.org/pdf/1501.02262v1.pdf' dref = refsecSearch(path) dref.findrefSection() Jname = [] conn = sqlite3.connect('bajs.db') c = conn.cursor() for clusts in dref.clust: for itr in range(clusts[0],clusts[1]): c.execute("SELECT * FROM JournalNames WHERE instr(?,Jname) > 0;",(dref.rows[itr])) Jname.append(c.fetchall()) c.close() conn.close()
from trainPlay import refsecSearch import os #str_inp = True ref_dir = os.path.join('C:\\','Users','Matteus','various papers') labeled_data ={"article":[], "ref_sec":[]} str_inp = ['dftb3.pdf','ZernerPertubation1.pdf','ZernerPertubation2.pdf'] outfile = os.path.join(ref_dir,'labeled_data') all_labels = [] for istr in str_inp: # while str_inp: # str_inp = raw_input('input dir for paper: ') ref_section = [] dref = refsecSearch(os.path.join(ref_dir,istr)) nrows = len(dref.rows) print 'There are a total of '+str(nrows)+' rows' dref.findrefSection() print 'the clusters found are the following' print dref.clust while True: start_val = int(raw_input('from what row do you want to start? ')) end_val = int(raw_input('until what row? ')) if start_val != 0: try: print dref.rows[start_val-1] except: print dref.rows[start_val-1].encode('utf-8') print '_____________________________________' for row in dref.rows[start_val:end_val]: try: print row