def init():
    try:
        createconnection()
        universal.con.execute(
            "create table if not exists " + universal.tablename + "(" +
            "Application_No varchar(50)," +
            "Date_of_filing_of_Application DATE," + "Publication_Date DATE," +
            "Name_of_Applicant varchar(1000)," +
            "Title_of_Invention varchar(1000)," +
            "Name_of_Inventor varchar(1500)," + "Abstract varchar(3500)," +
            "No_of_Pages INT," + "No_of_Claims INT," +
            "International_Classification varchar(100)," +
            "Priority_Document_No varchar(70)," +
            "Priority_date varchar(100)," +
            "Name_of_Priority_country varchar(70)," +
            "International_Publication_No varchar(70)," +
            "International_Application_No varchar(70)," +
            "International_Application_No_filing_date DATE," +
            "Patent_of_addition_to_Application_No varchar(70)," +
            "Patent_of_addition_to_Application_No_filing_date DATE," +
            "Divisional_Application_No varchar(100)," +
            "Divisional_Application_No_filing_date DATE" + ")")
        universal.con.commit()
    except Exception as e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
    finally:
        closeconnection()
Exemple #2
0
def createconnection():
    try:
        universal.con = _mysql.connect(universal.host,universal.user,universal.password)
        universal.con.query("use "+ universal.dbname)
    except _mysql.Error, e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
Exemple #3
0
def loop():
    try:
        transform("Date of filing of Application")
        transform("Publication Date")
        transform("Priority Date")
        transform("IAFiling Date")
        transform("IBFiling Date")
        transform("ICFiling Date")
        q = ('insert into '+universal.tablename+' values("'+
            universal.data["Application No."]+'","'+
            universal.data["Date of filing of Application"]+'","'+
            universal.data["Publication Date"]+'","'+
            universal.data["Name of Applicant"]+'","'+
            universal.data["Title of the invention"]+'","'+
            universal.data["Name of Inventor"]+'","'+
            universal.data["Abstract"]+'","'+
            universal.data["No. of Pages"]+'","'+
            universal.data["No. of Claims"]+'","'+
            universal.data["International classification"]+'","'+
            universal.data["Priority Document No"]+'","'+
            universal.data["Priority Date"]+'","'+
            universal.data["Name of priority country"]+'","'+
            universal.data["International Publication No"]+'","'+
            universal.data["International Application No"]+'","'+
            universal.data["IAFiling Date"]+'","'+
            universal.data["Patent of Addition to Application Number"]+'","'+
            universal.data["IBFiling Date"]+'","'+
            universal.data["Divisional to Application Number"]+'","'+
            universal.data["ICFiling Date"]+'")')
        universal.con.query(q)
        #print(q)
    except Exception as e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
Exemple #4
0
def init():
    try:
        universal.con = _mysql.connect(universal.host,universal.user,universal.password)
        universal.con.query("create database if not exists "+universal.dbname)
        universal.con.query("use "+universal.dbname)
        universal.con.query("create table if not exists "+universal.tablename+"("+
                  "Application_No varchar(50),"+ 
                  "Date_of_filing_of_Application DATE,"+
                  "Publication_Date DATE,"+
                  "Name_of_Applicant varchar(1000),"+
                  "Title_of_Invention varchar(1000),"+
                  "Name_of_Inventor varchar(1500),"+
                  "Abstract varchar(3500),"+
                  "No_of_Pages varchar(30),"+
                  "No_of_Claims varchar(30),"+
                  "International_Classification varchar(50),"+
                  "Priority_Document_No varchar(50),"+
                  "Priority_date DATE,"+
                  "Name_of_Priority_country varchar(30),"+
                  "International_Publication_No varchar(30),"+
                  "International_Application_No varchar(30),"+
                  "International_Application_No_filing_date DATE,"+
                  "Patent_of_addition_to_Application_No varchar(30),"+
                  "Patent_of_addition_to_Application_No_filing_date DATE,"+
                  "Divisional_Application_No varchar(30),"+
                  "Divisional_Application_No_filing_date DATE"+
                  ")")
    except Exception as e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
    finally:
        closeconnection()
Exemple #5
0
def loop():
    try:
        transform("Date of filing of Application")
        transform("Publication Date")
        transform("Priority Date")
        transform("IAFiling Date")
        transform("IBFiling Date")
        transform("ICFiling Date")
        if (universal.data["No. of Pages"] == "NA"):
            universal.data["No. of Pages"] = '0'
        if (universal.data["No. of Claims"] == "NA"):
            universal.data["No. of Claims"] = '0'
##        if(is_ascii(universal.data["Name of Applicant"])==False):
##            temp = universal.data["Name of Applicant"]
##            universal.data["Name of Applicant"]=unicodedata.normalize('NFKD',temp).encode('ascii','ignore')
##        if(is_ascii(universal.data["Title of the invention"])==False):
##            temp1 = universal.data["Title of the invention"]
##            universal.data["Title of the invention"]=unicodedata.normalize('NFKD',temp1).encode('ascii','ignore')
##        if(is_ascii(universal.data["Name of Inventor"])==False):
##            temp2 = universal.data["Name of Inventor"]
##            universal.data["Name of Inventor"]=unicodedata.normalize('NFKD',temp2).encode('ascii','ignore')
##        if(is_ascii(universal.data["Abstract"])==False):
##            temp3=universal.data["Abstract"]
##            universal.data["Abstract"]=unicodedata.normalize('NFKD',temp3).encode('ascii','ignore')
##        if(is_ascii(universal.data["Name of priority country"])==False):
##            temp4=universal.data["Name of priority country"]
##            universal.data["Name of priority country"]=unicodedata.normalize('NFKD',temp4).encode('ascii','ignore')
        q = ('insert into ' + universal.tablename + ' values("' +
             universal.data["Application No."] + '","' +
             universal.data["Date of filing of Application"] + '","' +
             universal.data["Publication Date"] + '","' +
             universal.data["Name of Applicant"] + '","' +
             universal.data["Title of the invention"] + '","' +
             universal.data["Name of Inventor"] + '","' +
             universal.data["Abstract"] + '","' +
             universal.data["No. of Pages"] + '","' +
             universal.data["No. of Claims"] + '","' +
             universal.data["International classification"] + '","' +
             universal.data["Priority Document No"] + '","' +
             universal.data["Priority Date"] + '","' +
             universal.data["Name of priority country"] + '","' +
             universal.data["International Publication No"] + '","' +
             universal.data["International Application No"] + '","' +
             universal.data["IAFiling Date"] + '","' +
             universal.data["Patent of Addition to Application Number"] +
             '","' + universal.data["IBFiling Date"] + '","' +
             universal.data["Divisional to Application Number"] + '","' +
             universal.data["ICFiling Date"] + '")')
        universal.con.execute(q)
        universal.con.commit()
        #print(q)
    except Exception as e:
        logwriter.logwrite("MySQL: " + str(e) + "  on page " +
                           str(int(universal.filename) + 1))
        universal.logflag = 1
        Format = universal.workbook.add_format()
        Format.set_font_color('red')
        universal.worksheet.set_row(universal.row, None, Format)
Exemple #6
0
def initial():
    run_command("mkdir " + universal.pdf_folder)
    run_command("mkdir " + universal.tag_folder)
    temp = universal.filename  #assigning filename to temp
    no_of_pages = burstpdf()
    logwriter.logwrite("\n********" + "\n" + temp + "\n*************\n")
    if no_of_pages == 0:
        logwriter.logwrite("No pages in this pdf\n")
        logwriter.logwrite("********" + "\n" + temp + "\n*************\n")
        return 0
    i = 0
    excelwriter.init()
    while i < no_of_pages:  #loop for locating first patent file
        universal.filename = str(i)
        convert.convert()  #for initializing conversion of files
        i += 1
        if parser.begin() != -1:
            excelwriter.loop()
            mysql.loop()
            break
    universal.flag = 1  #Process of extraction will start
    print(universal.con)
    while i < no_of_pages:
        universal.filename = str(i)
        convert.convert()  #for initializing conversion of files
        if parser.begin() == -1:
            i += 1
            continue
        excelwriter.loop()
        mysql.loop()
        i += 1
    universal.workbook.close()
    run_command("rm -r " + universal.pdf_folder)
    run_command("rm -r " + universal.tag_folder)
    logwriter.logwrite("********" + "\n" + temp + "\n*************\n")
Exemple #7
0
def transform(tag):
    f = "%d/%m/%Y"
    s = str(universal.data[tag])
    if (s == "NA"):
        s = "01/01/0001"
    try:
        ss = datetime.datetime.strptime(s, f)
    except:
        s = "01/01/0001"
        ss = datetime.datetime.strptime(s, f)
        logwriter.logwrite("MYSQL: " + tag + "-" + universal.data[tag] +
                           " is not in format on page " +
                           str(int(universal.filename) + 1))
    universal.data[tag] = str(ss.year) + "/" + str(ss.month) + "/" + str(
        ss.day)
Exemple #8
0
def begin():      #return 1 if string is not present
  universal.datastring=""
  reopen(universal.filename+universal.filename+".html") #html-tag filename converted from pdf
  #page = requests_session.get('file:///home/killerbee/Desktop/test2/'+filename)   #file name
  #universal.tree = html.fromstring(page.content)
  s = universal.tree.itertext()
#  universal.test=["(21) Application No","Date of filing of Application","Publication Date","Title of the invention","International classification","Priority Document","Priority Date","Name of priority country","International Application","Fil","International Publication","Patent of Addition to Application","Fil","Divisional to Application","Fil","Name of Applicant","(72)Name of Inventor","Abstract"]
  for a in s:
    universal.datastring += a
  try:
    return(extractor.getdetails(universal.datastring))
  except Exception as e:
    logwriter.logwrite("Extracter: "+str(e)+" on page "+str(int(universal.filename)+1))
    universal.logflag = 1
    return -1
  return 0
Exemple #9
0
def getdetails(new_patent):#new_patent must have spaces b/w consecutive words
    patent=""
    data.clear()
    flag.clear()
    del indexvalues[:]
    for j in range (0,len(new_patent)):
        if(new_patent[j]=='('):
          if(new_patent[j+1].isdigit()):
            patent+=" ("
        elif(new_patent[j]==')'):
          if(new_patent[j-1].isdigit()):
            patent+=") "
        else:
            patent+=new_patent[j]       
    temp_patent = patent  #temp_patent is the original string with capital letters
    patent = patent.lower()
    words = patent.split()#word(list) contains lower letter string
    if(check(words)==True):#check checks if pdf has patent
        tagindex=0
        for tag in Tags:
            flag[tag]=0#flag[tag]=1 implies value to tag has been assigned 
        pages,claims = getnoofpagesandclaims(temp_patent) #here we are going to give values to no of pages and no of claims tags and insert the starting and ending index in indexvalues
        if(bool(pages.start!=-1) & bool(pages.end!=-1)):
         indexvalues.append(Tag(pages_tag,pages.start,pages.end))#flag has been asigned 1 already in getnoofpagesandclaims
        if(bool(claims.start!=-1) & bool(claims.end!=-1)):
         indexvalues.append(Tag(claims_tag,claims.start,claims.end))#flag has been asigned 1 already in getnoofpagesandclaims
          
        while tagindex<len(Tags):
           tag=Tags[tagindex]
           if(flag[tag]!=1): 
              i = searchtag(words,tag)#i recieve pair of tag(start,end)
              #print(tag+" "+str(words[i.start:i.end+1]))
              if(i==Pair(-1,-1)):
                  (a,b)=locate(tag,words,0.85)
                  logwriter.logwrite(tag+" locate function") 
                  if a!=-1:
                    i=searchtag(words,b)
                    #print(tag+" "+str(i))
                    if(i==Pair(-1,-1)):
                      data[tag]="NA" #log writer
                      logwriter.logwrite(tag+" not found")
                      #print(tag)
                      flag[tag]=1
                  else :  
                   data[tag]="NA"  #log writer
                   logwriter.logwrite(tag+" not found")
                   #print(tag)
                   flag[tag]=1
              #print(tag+' '+"".join(words[i.start:i.end]))   #problem with tracking of start and end index of tags  
              if(flag[tag]==0):#flag[tag]=1 implies value to tag has been assigned 
               indexvalues.append(Tag(tag,i.start,i.end))     
           #print(str(tag)+" "+str(flag[tag]))
           tagindex+=1
        #print(data)   
        extractvalues(temp_patent.split())
        return 1;
    else:
        return -1;
Exemple #10
0
def begin():  #return 1 if string is not present
    universal.datastring = ""
    reopen(universal.filename + universal.filename +
           ".html")  #html-tag filename converted from pdf
    #page = requests_session.get('file:///home/killerbee/Desktop/test2/'+filename)   #file name
    #universal.tree = html.fromstring(page.content)
    s = universal.tree.itertext()
    #  universal.test=["(21) Application No","Date of filing of Application","Publication Date","Title of the invention","International classification","Priority Document","Priority Date","Name of priority country","International Application","Fil","International Publication","Patent of Addition to Application","Fil","Divisional to Application","Fil","Name of Applicant","(72)Name of Inventor","Abstract"]
    logwriter.logwrite("***************" + universal.filename +
                       "*************")
    for a in s:
        universal.datastring += a
    try:
        return (extractor.getdetails(universal.datastring))

    except Exception as e:
        logwriter.logwrite(e)
        return -1


#  write code for case when tayal returns -1 and you have to run your extraction function
#  implement ur extraction function and then call it

#extractor.getdetails(universal.datastring)
#  for tag in universal.test:
#    tempi=i
#   # i=extractor(i,tag)
#    if i==-1:
#      if(extractor.mycheck(universal.datastring)==0):
#        fappend=open("log.txt",'a')
#        fappend.write("-->"+str(universal.filename)+"->"+tag+"--->"+universal.datastring[tempi:tempi+len(tag)]+'\n')
#        fappend.close()
#        return -1
#    i+=1

    return 0
Exemple #11
0
def initial():
    #run_command("mkdir "+universal.pdf_folder)
    try:
        os.mkdir(universal.pdf_folder)
        os.mkdir(universal.tag_folder)
    except Exception as e:
        shutil.rmtree(universal.pdf_folder)
        shutil.rmtree(universal.tag_folder)
        os.mkdir(universal.pdf_folder)
        os.mkdir(universal.tag_folder)
    temp = universal.filename  #assigning filename to temp
    no_of_pages = burstpdf()
    logwriter.logwrite("\n********" + "\n" + str(temp) + "\n*************\n")
    if no_of_pages == 0:
        logwriter.logwrite("No pages in this pdf\n")
        logwriter.logwrite("********" + "\n" + str(temp) + "\n*************\n")
        return 0
    i = 0
    excelwriter.init()
    while i < no_of_pages:  #loop for locating first patent file
        universal.filename = str(i)
        convert.convert()  #for initializing conversion of files

        i += 1
        if (Parser.begin() != -1):
            excelwriter.loop()
            sqlitewriter.loop()
            break
    universal.flag = 1  #Process of extraction will start
    #print (universal.con)
    while i < no_of_pages:
        universal.filename = str(i)
        convert.convert()  #for initializing conversion of files
        if (Parser.begin() == -1):
            i += 1
            continue
        excelwriter.loop()
        sqlitewriter.loop()
        i += 1
    universal.workbook.close()
    #run_command("rm -r "+universal.pdf_folder)
    shutil.rmtree(universal.pdf_folder)
    #run_command("rm -r "+universal.tag_folder)
    shutil.rmtree(universal.tag_folder)
    logwriter.logwrite("********" + "\n" + str(temp) + "\n*************\n")
Exemple #12
0
def createconnection():
    try:
        universal.con = sqlite3.connect(universal.dbname + ".db")
    except Exception as e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
Exemple #13
0
def closeconnection():
    try:
        universal.con.close()
    except Exception as e:
        logwriter.logwrite(str(e))
        universal.logflag = 1
Exemple #14
0
def loop():
    try:
        universal.worksheet.write(universal.row, 0,
                                  universal.data["Application No."])
        universal.worksheet.write(
            universal.row, 1, universal.data["Date of filing of Application"],
            universal.date_format)
        universal.worksheet.write(universal.row, 2,
                                  universal.data["Publication Date"],
                                  universal.date_format)
        universal.worksheet.write(universal.row, 3,
                                  universal.data["Name of Applicant"])
        universal.worksheet.write(universal.row, 4,
                                  universal.data["Title of the invention"])
        universal.worksheet.write(universal.row, 5,
                                  universal.data["Name of Inventor"])
        universal.worksheet.write(universal.row, 6, universal.data["Abstract"])
        if (universal.data["No. of Pages"].upper() != "NA"):
            universal.worksheet.write(universal.row, 7,
                                      int(universal.data["No. of Pages"]))
        else:
            universal.worksheet.write(universal.row, 7,
                                      universal.data["No. of Pages"].upper())
        if (universal.data["No. of Claims"].upper() != "NA"):
            universal.worksheet.write(universal.row, 8,
                                      int(universal.data["No. of Claims"]))
        else:
            universal.worksheet.write(universal.row, 8,
                                      universal.data["No. of Claims"].upper())
        universal.worksheet.write(
            universal.row, 9, universal.data["International classification"])
        universal.worksheet.write(universal.row, 10,
                                  universal.data["Priority Document No"])
        if (universal.data["Priority Date"] == "NA"):
            universal.worksheet.write(universal.row, 11,
                                      universal.data["Priority Date"])
        else:
            universal.worksheet.write(universal.row, 11,
                                      universal.data["Priority Date"],
                                      universal.date_format)
        universal.worksheet.write(universal.row, 12,
                                  universal.data["Name of priority country"])
        universal.worksheet.write(
            universal.row, 13, universal.data["International Application No"])
        if (universal.data["IAFiling Date"] == "NA"):
            universal.worksheet.write(universal.row, 14,
                                      universal.data["IAFiling Date"])
        else:
            universal.worksheet.write(universal.row, 14,
                                      universal.data["IAFiling Date"],
                                      universal.date_format)
        universal.worksheet.write(
            universal.row, 15, universal.data["International Publication No"])
        universal.worksheet.write(
            universal.row, 16,
            universal.data["Patent of Addition to Application Number"])
        if (universal.data["IBFiling Date"] == "NA"):
            universal.worksheet.write(universal.row, 17,
                                      universal.data["IBFiling Date"])
        else:
            universal.worksheet.write(universal.row, 17,
                                      universal.data["IBFiling Date"],
                                      universal.date_format)
        universal.worksheet.write(
            universal.row, 18,
            universal.data["Divisional to Application Number"])
        if (universal.data["ICFiling Date"] == "NA"):
            universal.worksheet.write(universal.row, 19,
                                      universal.data["ICFiling Date"])
        else:
            universal.worksheet.write(universal.row, 19,
                                      universal.data["ICFiling Date"],
                                      universal.date_format)

        universal.row = universal.row + 1
    except Exception as e:
        logwriter.logwrite("Excelfile : " + str(e) + " on page " +
                           universal.filename)
Exemple #15
0
def closeconnection():
    try:
        universal.con.close()
    except _mysql.Error, e:
        logwriter.logwrite(str(e))
        universal.logflag = 1