Ejemplo n.º 1
0

from montysolr import config, optionparse
import sys 

import gdata.spreadsheet.service

log = config.get_logger('montysolr.gd_add_row')



def run(user=None, password=None, spreadsheet=None, keys=[], data=[], verbose=None, sep=None):
    '''
    Utility to add values into a Google SpreadSheet

    usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000
    -p, --password = PASS: password to access the Google Data
    -u, --user = USER: user name
    -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!)
    -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!)
    -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator
    -e, --sep = SEP: character to use as a value separator for the data fields, default [|]
    -v, --verbose = VERBOSE: numeric value of the logging module [30]
    '''

    log.setLevel(int(verbose))
    
    vals = []
    for v in data:
        vv = v.split(sep)
        if len(vv) != len(keys):
Ejemplo n.º 2
0
import sys
import time
import logging

import urllib2 as u2
import httplib
import simplejson
import urllib

from montysolr import config

log = config.get_logger("montysolr.examples.adsabs.run_dump")


def req(url, **kwargs):
    kwargs['wt'] = 'json'
    params = urllib.urlencode(kwargs)
    page = ''
    try:
        conn = urllib.urlopen(url, params)
        page = conn.read()
        rsp = simplejson.loads(page)
        conn.close()
        return rsp
    except Exception, e:
        log.error(str(e))
        log.error(page)
        raise e


def run_dump(solr_url,
Ejemplo n.º 3
0
import sys
import time
import logging

import urllib2 as u2
import httplib
import simplejson
import urllib

from montysolr import config

log = config.get_logger("montysolr.examples.adsabs.reindex")


def req(url, **kwargs):
    kwargs["wt"] = "json"
    params = urllib.urlencode(kwargs)
    page = ""
    try:
        conn = urllib.urlopen(url, params)
        page = conn.read()
        rsp = simplejson.loads(page)
        conn.close()
        return rsp
    except Exception, e:
        log.error(str(e))
        log.error(page)
        raise e


def run_dump(solr_url, wait=10, max_wait=3600):
Ejemplo n.º 4
0

from montysolr import config, optionparse
from lxml import etree

import sys 


log = config.get_logger('montysolr.extract_values')



def run(xpath, input=None, sep=None, verbose=None):
    '''
    Utility to extract text values from the xml

    usage: %prog [options]
    -i, --input = I: file input
    -e, --sep = SEP: character to use as a value separator for the data fields, default [|]
    -v, --verbose = VERBOSE: numeric value of the logging module [30]
    '''

    log.setLevel(int(verbose))
    
    
    root = etree.parse(input)
    tree = root.getroot()
    
    for xp in xpath:
        
        xp = xp.replace('\\n', '\n').replace('\\t', '\t')
Ejemplo n.º 5
0
from montysolr import config, optionparse
from lxml import etree

import sys

log = config.get_logger('montysolr.extract_values')


def run(xpath, input=None, sep=None, verbose=None):
    '''
    Utility to extract text values from the xml

    usage: %prog [options]
    -i, --input = I: file input
    -e, --sep = SEP: character to use as a value separator for the data fields, default [|]
    -v, --verbose = VERBOSE: numeric value of the logging module [30]
    '''

    log.setLevel(int(verbose))

    root = etree.parse(input)
    tree = root.getroot()

    for xp in xpath:

        xp = xp.replace('\\n', '\n').replace('\\t', '\t')
        log.info(xp)

        elems = tree.xpath(xp)
        if len(elems) < 1:
            log.error("Nothing found in %s for xpath %s" % (input, xp))
Ejemplo n.º 6
0
import sys
import time
import logging

import urllib2 as u2
import httplib
import simplejson
import urllib

from montysolr import config

log = config.get_logger("montysolr.examples.adsabs.run_dump")


def req(url, **kwargs):
    kwargs["wt"] = "json"
    params = urllib.urlencode(kwargs)
    page = ""
    try:
        conn = urllib.urlopen(url, params)
        page = conn.read()
        rsp = simplejson.loads(page)
        conn.close()
        return rsp
    except Exception, e:
        log.error(str(e))
        log.error(page)
        raise e


def run_dump(solr_url, source_field="author", target_field="author_collector", max_wait=3600):
Ejemplo n.º 7
0
import sys
import os

from montysolr import config 
from monty_examples.utils import req


log = config.get_logger("montysolr.examples.measure_qtime")

def run(solr_url, query, repetitions=1):
    
    repetitions = int(repetitions)
    
    if (os.path.exists(query)):
        queries = load_queries(query)
        log.info("Loaded %s queries from: %s" % (len(queries), query))
    else:
        queries = [query]
    
    results = {}
    
    for i in (range(repetitions)):
        log.info("Starting iteration: #%s" % i)
        for q in queries:
            log.info("%s" % q)
            rsp = req(solr_url, q=q, rows=0)
            
            if (not rsp['responseHeader'].has_key('status') or rsp['responseHeader']['status'] != 0):
                log.error("Error searching: %s" % str(rsp))
                continue
Ejemplo n.º 8
0
from montysolr import config, optionparse
import sys

import gdata.spreadsheet.service

log = config.get_logger('montysolr.gd_add_row')


def run(user=None,
        password=None,
        spreadsheet=None,
        keys=[],
        data=[],
        verbose=None,
        sep=None):
    '''
    Utility to add values into a Google SpreadSheet

    usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000
    -p, --password = PASS: password to access the Google Data
    -u, --user = USER: user name
    -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!)
    -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!)
    -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator
    -e, --sep = SEP: character to use as a value separator for the data fields, default [|]
    -v, --verbose = VERBOSE: numeric value of the logging module [30]
    '''

    log.setLevel(int(verbose))

    vals = []
Ejemplo n.º 9
0
import sys
import time

import pprint

from montysolr import config 
from monty_examples.utils import req

log = config.get_logger("montysolr.examples.adsabs.recreate_index")

    

def recreate_index(solr_url, 
                   max_time=3600,
                   delay=5,
                   handler_name='/invenio/update',
                   maximport=500,
                   batchsize=2000,
                   startfrom=-1,
                   inveniourl='python://search',
                   importurl='/invenio/import?command=full-import&amp;dirs=',
                   updateurl='/invenio/import?command=full-import&amp;dirs=',
                   deleteurl='blankrecords',
                   doctor_handler='/invenio-doctor'
                   ):
    
    up_url = solr_url + handler_name
    doctor_url = solr_url + doctor_handler
    
    delay = int(delay)
Ejemplo n.º 10
0
from montysolr import config, optionparse
import sys

import gdata.spreadsheet.service

log = config.get_logger("montysolr.gd_add_row")


def run(user=None, password=None, spreadsheet=None, keys=[], data=[], verbose=None, sep=None):
    """
    Utility to add values into a Google SpreadSheet

    usage: %prog -u montysolr -p <password> -s SpreadSheetName -k date,docs -d 12/13/14,5000
    -p, --password = PASS: password to access the Google Data
    -u, --user = USER: user name
    -s, --spreadsheet = FILE: spread sheet name (you must have created the spreadsheet manually!)
    -k, --keys = KEYS: comma separated names from the header in the table (you must have created it manually!)
    -d, --data = DATA: comma separated list of values to insert, each row is then split using row-separator
    -e, --sep = SEP: character to use as a value separator for the data fields, default [|]
    -v, --verbose = VERBOSE: numeric value of the logging module [30]
    """

    log.setLevel(int(verbose))

    vals = []
    for v in data:
        vv = v.split(sep)
        if len(vv) != len(keys):
            log.error("The data is not of the same size as header!")
            log.error("header=%s, data=%s" % (keys, v))
            raise (Exception("Wrong input"))