Esempio n. 1
0
def runTest(doi,transform):
  db = couchdb_server['documents']
  eqnID, source = rand.choice(db[doi]['source'].items())
  results = None
  searchTerm = None
  try:
    searchTerm = transform(source)
    url = "http://localhost:%s/documents/_external/index?searchTerm=\"%s\"&searchTimeout=20&limit=2500" % (port, urllib.quote(searchTerm))
    startTime = time.time()
    resultsFile = urllib.urlopen(url)
    endTime = time.time()
    results = minidom.parse(resultsFile)
    if results.getElementsByTagName("LatexParseError"):
      print "Latex parse error on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
      return False
    if results.getElementsByTagName("TimedOut"):
      print "Timed out on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
      return False
    if results.getElementsByTagName("LimitExceeded"):
      print "Limit exceeded on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
      return False
    for result in results.getElementsByTagName("Article") + results.getElementsByTagName("Chapter"):
      if result.attributes.get('doi').value == decodeDoi(doi):
        for eqn in result.getElementsByTagName("equation"):
          if eqn.attributes.get('id').value == eqnID:
            print "Passed on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
            return True
    print "Failed on doi: %s and eqnID: %s (%fs)" % (doi, eqnID, endTime-startTime)
    print searchTerm
    return False
  except KeyboardInterrupt, e:
    raise e
Esempio n. 2
0
def reprocess():
  db = couchdb_server['documents']

  print "Reprocessing latex sources"    
  for doi in db:
    print "Reprocessing %s" % decodeDoi(doi)
    doc = db[doi]
    doc['content'] = dict(filterNone([(preprocess(eqnID, latex)) for (eqnID, latex) in doc['source'].items()]))
    db[doi] = doc
Esempio n. 3
0
def convert_journalID_containerID():
  db = couchdb_server['documents']

  print "Converting"    
  for doi in db:
    print "Converting %s" % decodeDoi(doi)
    doc = db[doi]
    if 'journalID' in doc:
      doc['containerID'] = doc['journalID']
      del doc['journalID']
    db[doi] = doc
Esempio n. 4
0
def check_dates():
  db = couchdb_server['documents']

  print "Checking dates"
  for doi in db:
    try:
      doc = db[doi]
      actual = doc['publicationYear']
      expected = ml_year(decodeDoi(doi))
      if expected != "":
        if expected != actual:
          print ("Doi: %s Expected: %s Actual: %s" % (doi, expected, actual))
          doc['publicationYear'] = expected
          db[doi] = doc
        else:
          print ("Doi: %s ok" % doi)
      elif doc.get('format', 'article').lower() == 'article':
        print ("ML year not defined for article: %s" % doi)
    except KeyboardInterrupt, e:
      raise e
    except Exception, e:
      print ("Failed on doi: %s" % doi)
      print e
Esempio n. 5
0
    if results.getElementsByTagName("LimitExceeded"):
      print "Limit exceeded on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
      return False
    for result in results.getElementsByTagName("Article") + results.getElementsByTagName("Chapter"):
      if result.attributes.get('doi').value == decodeDoi(doi):
        for eqn in result.getElementsByTagName("equation"):
          if eqn.attributes.get('id').value == eqnID:
            print "Passed on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, endTime-startTime)
            return True
    print "Failed on doi: %s and eqnID: %s (%fs)" % (doi, eqnID, endTime-startTime)
    print searchTerm
    return False
  except KeyboardInterrupt, e:
    raise e
  except Exception, e:
    print "Error on doi: %s and eqnID: %s (%fs)" % (decodeDoi(doi), eqnID, 0)
    print e
    try:
      print "Searchterm: %s" % searchTerm
    except UnicodeEncodeError:
      pass
    return False

def runTests(n,transform):
  db = couchdb_server['documents']
  dois = list(db)
  for i in xrange(0,n):
    doi = None
    source = None
    while not source:
      try: