Example #1
0
class OperateCsv(object):
    """for csv file
    """
    def __init__(self):
        self.store = Store()

    def read_csv_columns(self, source_file, *cared_title, **kwargs):
        """read_csv_columns to read csv specific one or more columns.

        +--------------------+------+------------------------------------+
        | Input Paramaters   | Man. | Description                        |
        +--------------------+------+------------------------------------+
        | source_file        | yes  | Path of csv file                   |
        +--------------------+------+------------------------------------+
        | cared_title        | yes  | one or more cared column's title   |
        +--------------------+------+------------------------------------+
        | return             | [colA_list] or [[colA_list], [colB_list]] |
        +--------------------+------+------------------------------------+

        Example:

        +------------------+--------------+-------------+------------+
        | read_csv_columns | /opt/xxx.csv | Time        |            |
        +------------------+--------------+-------------+------------+
        | read_csv_columns | /opt/xxx.csv | Time        | SFN        |
        +------------------+--------------+-------------+------------+

        """

        result = []
        csv_obj = CsvHandler(source_file, cared_title[0])
        self.store.add(csv_obj, alias=kwargs.get('alias'))
        result = csv_obj.get_csv_columns_list(*cared_title)
        self.store.remove(alias=kwargs.get('alias'))
        return result
Example #2
0
    def all(self, data):
        solutions = Store()

        tracker = Tracker()

        final = self.size - 1

        pos = (0, 0)

        try:
            while True:
                if (data[pos[0]][pos[1]] == 0):
                    pos, data = self.attempt(pos, data, tracker)

                if (pos[0] == final and pos[1] == final):
                    solutions.add(data)
                    pos, data = self.attempt(pos, data, tracker)

                if (pos[1] == final):
                    pos = (pos[0] + 1, 0)
                else:
                    pos = (pos[0], pos[1] + 1)
        except Exception as e:
            if (str(e) != 'There is no solution!'):
                raise e

        return solutions.all()
Example #3
0
class OperateTxt(object):
    """for setup/teardown/modify/add/remove/get txt file
    """
    def __init__(self):
        self.store = Store()

    def setup_txt(self, src_file, save_path=None, **kwargs):
        """setup txt file

        :param string src_file: the source txt file
        :param string save_path: the save txt file name.if None will use source file name
        """
        hander = TxtHandler()
        if not save_path:
            save_path = src_file
        hander.setup(src_file, save_path)
        self.store.add(hander, alias=kwargs.get('alias'))

    def teardown_txt(self, **kwargs):
        """teardown txt file
        """
        alias = kwargs.get('alias')
        self.store.get(alias).teardown()
        self.store.remove(alias=kwargs.get('alias'))

    def modify_txt_node(self, *args, **kwargs):
        """modify txt node

        :param string args: such as 0x10042:0x0A691892  0x10043:51015
        """
        alias = kwargs.get('alias')
        self.store.get(alias).modify_node(*args)

    def add_txt_node(self, *args, **kwargs):
        """add txt node
        :param string args: such as '0x10301:1#MAC L2' or '0x10302:2'
        """
        alias = kwargs.get('alias')
        self.store.get(alias).add_node(*args)

    def delete_txt_node(self, *args, **kwargs):
        """delete txt node

        :param string args: such as '0x10049'
        """
        alias = kwargs.get('alias')
        self.store.get(alias).delete_node(*args)

    def read_txt_node(self, *args, **kwargs):
        """get txt node text value

        :param string args: such as '0x10040'
        """
        alias = kwargs.get('alias')
        return self.store.get(alias).get_node_text(*args)
Example #4
0
def go_store():
    store = Store()

    store.remove(None)
    store.add(323, 2, 10, 88, 78, 415, 89, 189)
    store.remove(78)

    store.save()
    store.load()

    print store.grep('\d*[02468]$')
    print store.find(89, -1, 415)

    for item in store.items():
        print item
Example #5
0
class UnzipFile(object):
    """for unzip .zip file
    """
    def __init__(self):
        self.store = Store()
        self._log = logging.getLogger(__name__)
        self._log.setLevel(logging.DEBUG)

    def unzip_file(self, source_file, to_path=None, **kwargs):
        """
        Uncompress '.gz','.tar','.tar.bz2','.tar.gz','.tgz','.tz2','.docx','.egg','.jar','.odg','.odp','.ods','.xlsx','.odt','.pptx','.zip' file
        +------------------+------+--------------------------------+
        | Input Parameters | Man. | Description                    |
        +------------------+------+--------------------------------+
        | source_file      | yes  | Path of compressed file        |
        +------------------+------+--------------------------------+
        | to_path          | no   | Path to save uncompressed file |
        +------------------+------+--------------------------------+

        Example:

        +------------------+------------------------------------------------+----------------------------------+
        | unzip_file       | /home/ute/ta_kiss_files/example/snapshot.zip   | /home/ute/ta_kiss_files/example  |
        +------------------+------------------------------------------------+----------------------------------+

        The uncompressed files is saved to /home/ute/ta_kiss_files/example/snapshot/...

        """
        if source_file.endswith(('.tar.gz', '.tar.bz2')):
            source_file_name = os.path.splitext(
                os.path.splitext(source_file)[0])[0]
        else:
            source_file_name = os.path.splitext(source_file)[0]

        if to_path is None:
            to_path = source_file_name
        else:
            to_path = os.path.join(to_path, os.path.basename(source_file_name))

        if os.path.isdir(to_path):
            shutil.rmtree(to_path)
        unzip_handler = ArchiveFile()
        self.store.add(unzip_handler, alias=kwargs.get('alias'))
        unzip_handler.deep_unzip_file(source_file, to_path)
        self.store.remove(alias=kwargs.get('alias'))
class Encryptor(object):
    def __init__(self):
        # self.fileName = fileName
        self.dbUser = os.environ.get('API_USER')
        self.dbPw = os.environ.get('API_PASSWORD')
        self.st = Store(self.dbUser, self.dbPw)

    def encrypt(self, message):
        key = Fernet.generate_key()
        f = Fernet(key)
        encoded = message.encode()
        encrypted = f.encrypt(encoded)
        hsh = self.generate_hash(encrypted)
        _strkey = key.decode()
        doc = {
            "hsh": hsh,
            "key": _strkey,
            "destroy": False,
            "date": datetime.datetime.utcnow()
        }
        self.st.add(doc)
        return encrypted

    def decrypt(self, encryptedMessage, destroy=False):
        hsh = self.generate_hash(encryptedMessage)
        if destroy:
            d = self.st.desFind({'hsh': hsh})
        else:
            d = self.st.find({'hsh': hsh})
        if d == None:
            return -1
        key = d['key'].encode()
        f = Fernet(key)
        _msg = encryptedMessage
        res = f.decrypt(_msg).decode()
        return res

    def destroy(self, encryptedMessage):
        hsh = self.generate_hash(encryptedMessage)
        fnd = self.st.desFind({'hsh': hsh})
        return fnd != {}

    def generate_hash(self, encrypted):
        return hashlib.sha224(encrypted).hexdigest()
Example #7
0
class DDSketch(object):
    def __init__(self, alpha=None, bin_limit=None, min_value=None):
        # Make sure the parameters are valid
        if alpha is None or (alpha <= 0 or alpha >= 1):
            alpha = DEFAULT_ALPHA
        if bin_limit is None or bin_limit < 0:
            bin_limit = DEFAULT_BIN_LIMIT
        if min_value < 0:
            min_value = DEFAULT_MIN_VALUE

        self.gamma = 1 + 2 * alpha / (1 - alpha)
        self.gamma_ln = math.log1p(2 * alpha / (1 - alpha))
        self.min_value = min_value
        self.offset = -int(math.ceil(math.log(min_value) / self.gamma_ln)) + 1

        self.store = Store(bin_limit)
        self._min = float('+inf')
        self._max = float('-inf')
        self._count = 0
        self._sum = 0

    def __repr__(self):
        return "store: {{{}}}, count: {}, sum: {}, min: {}, max: {}".format(
            self.store, self._count, self._sum, self._min, self._max)

    @property
    def name(self):
        return 'DDSketch'

    @property
    def num_values(self):
        return self._count

    @property
    def avg(self):
        return float(self._sum) / self._count

    @property
    def sum(self):
        return self._sum

    def get_key(self, val):
        if val < -self.min_value:
            return -int(math.ceil(
                math.log(-val) / self.gamma_ln)) - self.offset
        elif val > self.min_value:
            return int(math.ceil(math.log(val) / self.gamma_ln)) + self.offset
        else:
            return 0

    def add(self, val):
        """ Add a value to the sketch.
        """
        key = self.get_key(val)
        self.store.add(key)

        # Keep track of summary stats
        self._count += 1
        self._sum += val
        if val < self._min:
            self._min = val
        if val > self._max:
            self._max = val

    def quantile(self, q):
        if q < 0 or q > 1 or self._count == 0:
            return np.NaN
        if q == 0:
            return self._min
        if q == 1:
            return self._max

        rank = int(q * (self._count - 1) + 1)
        key = self.store.key_at_rank(rank)
        if key < 0:
            key += self.offset
            quantile = -2 * pow(self.gamma, -key) / (1 + self.gamma)
        elif key > 0:
            key -= self.offset
            quantile = 2 * pow(self.gamma, key) / (1 + self.gamma)
        else:
            quantile = 0

        return max(quantile, self._min)

    def merge(self, sketch):
        if not self.mergeable(sketch):
            raise UnequalSketchParametersException(
                "Cannot merge two DDSketches with different parameters")

        if sketch._count == 0:
            return

        if self._count == 0:
            self.copy(sketch)
            return

        # Merge the stores
        self.store.merge(sketch.store)

        # Merge summary stats
        self._count += sketch._count
        self._sum += sketch._sum
        if sketch._min < self._min:
            self._min = sketch._min
        if sketch._max > self._max:
            self._max = sketch._max

    def mergeable(self, other):
        """ Two sketches can be merged only if their gamma and min_values are equal.
        """
        return self.gamma == other.gamma and self.min_value == other.min_value

    def copy(self, sketch):
        self.store.copy(sketch.store)
        self._min = sketch._min
        self._max = sketch._max
        self._count = sketch._count
        self._sum = sketch._sum
Example #8
0
class Encryptor(object):
    """
        The Encryptor object, handles encryption and decryption
    """
    def __init__(self):
        # process mongodb environs
        try:
            self.dbUser = os.environ.get("API_USER")
            self.dbPw = os.environ.get("API_PASSWORD")
        except:
            self.dbUser = 0
            self.dbPw = 0
        self.dbUrl = os.environ.get("API_URL")
        # Add store object to interact with the database
        self.st = Store(self.dbUrl, self.dbUser, self.dbPw)
        # Emoji converter
        self.conv = EmojiConverter("emojList.txt")

    def encrypt(self, message, emoji=False) -> str:
        # Use fernet to generate key
        key = Fernet.generate_key()
        f = Fernet(key)
        # Compress the intended message via zlib
        encoded = zlib.compress(message.encode())
        # The encryption
        encrypted = f.encrypt(encoded)
        # Generate the hash of the encrypteds tring
        hsh = generate_hash(encrypted)
        # decoded key to store in mongodb
        _strkey = key.decode()
        # Store data in mongodb
        doc = {
            "hsh": hsh,
            "key": _strkey,
            "destroy": False,
            "compress": "zlib",
            "date": datetime.datetime.utcnow(),
        }
        self.st.add(doc)
        # Return the encrypted message
        encrypted = encrypted.decode()
        if emoji:
            encrypted = self.conv.sentence_to_emoji(encrypted)
        return encrypted

    def decrypt(self, encryptedMessage, destroy=False) -> str:
        # hashing the message
        em = encryptedMessage.decode()
        if self.conv.is_emoji(em[0]):
            encryptedMessage = self.conv.emoji_to_sentence(em).encode()
        hsh = generate_hash(encryptedMessage)
        # Find the dataset in mongodb
        if destroy:
            d = self.st.desFind({"hsh": hsh})
        else:
            d = self.st.find({"hsh": hsh})
        if d == None:
            return -1
        # decrypt with key
        key = d["key"].encode()
        f = Fernet(key)
        _msg = encryptedMessage
        res = f.decrypt(_msg)
        res = zlib.decompress(res).decode()
        return res

    def destroy(self, encryptedMessage) -> bool:
        hsh = generate_hash(encryptedMessage)
        fnd = self.st.desFind({"hsh": hsh})
        return fnd != {}
Example #9
0
class TestStore(unittest2.TestCase):
    def setUp(self):
        self.store = Store(name="scratch")
        self.ns = Namespace('http://example.com/#')

    def tearDown(self):
        self.store.close()

    def testSize(self):
        """ Tests the size of the repository """
        self.assertEqual(len(self.store), 0)

    def testAdd(self):
        bob = self.ns['bob']
        name = self.ns['name']
        value = Literal('Bob Bilbins')
        self.store.add((bob, name, value))
        self.assertEqual(len(self.store), 1)

    def testRemove(self):
        triple = (self.ns['alice'], self.ns['name'], Literal('Alice'))
        self.store.add(triple)
        self.assertEqual(len(self.store), 1)
        self.store.remove(triple)
        self.assertEqual(len(self.store), 0)

    def testTriples(self):
        """ Tests the search by triple. """
        triple = (self.ns['alice'], self.ns['name'], Literal('Alice'))
        self.store.add(triple)
        for tri in self.store.triples((self.ns['alice'], None, None)):
            for i in range(3):
                self.assertEqual(tri[i], triple[i])

    def testSimpleSparql(self):
        triple = (self.ns['alice'], self.ns['name'], Literal('Alice'))
        self.store.add(triple)
        for tri in self.store.query("SELECT ?s ?p ?o  WHERE {?s ?p ?o .}"):
            for i in range(3):
                self.assertEqual(tri[i], triple[i])

    def testNamespacedSparql(self):
        triple = (self.ns['alice'], self.ns['name'], Literal('Alice'))
        self.store.add(triple)
        self.store.add((self.ns['bob'], self.ns['name'], Literal('Bob')))
        for tri in self.store.query("SELECT ?p ?o  WHERE { ex:alice ?p ?o .}",
                                    initNs={'ex': self.ns}):
            for i in range(1, 3):
                self.assertEqual(tri[i - 1], triple[i])

    def testBindedSparql(self):
        triple = (self.ns['alice'], self.ns['name'], Literal('Alice'))
        self.store.add(triple)
        self.store.add((self.ns['bob'], self.ns['name'], Literal('Bob')))
        for tri in self.store.query("SELECT ?p ?o  WHERE { ?s ?p ?o .}",
                                    initBindings={'s': self.ns['alice']}):
            for i in range(1, 3):
                self.assertEqual(tri[i - 1], triple[i])

    def testDataTypes(self):
        birth = Literal('2006-01-03', datatype=_XSD_NS.date)
        comp = Literal('2006-01-01', datatype=_XSD_NS.date)
        triple = (self.ns['alice'], self.ns['birthdate'], birth)
        self.store.add(triple)
        for s, p, o in self.store.query("SELECT ?s ?p ?o  WHERE {?s ?p ?o .}"):
            self.assertLess(comp, birth)
Example #10
0
class BaseScraper(object):
    """ basic scraper framework for grabbing press releases

    Derived scrapers generally need to implement:
    name          - string name of the scraper
    doc_type      - numeric document type for uploaded press releases
    find_latest() - to grab a list of the latest press releases (usually
                    from an rss feed)
    extract()     - parse html data to pull out the various text and metadata
                    of the press release
    """
    def __init__(self):

        # derived classes need to set these
        assert self.name is not None
        assert self.doc_type is not None

        self.parser = OptionParser(usage="%prog: [options]")
        self.parser.add_option('-v', '--verbose', action='store_true')
        self.parser.add_option('-d', '--debug', action='store_true')
        self.parser.add_option('-t', '--test', action='store_true', help="test only - don't send any documents to server")
        self.parser.add_option('-c', '--cache', action='store_true', help="cache all http transfers in .cache dir (for repeated runs during test)")
        self.parser.add_option('-u', '--url', nargs=1, help="process just the given URL")
        self.parser.add_option('-i', '--ini-file', default="churnalism.cfg", nargs=1, help="filename for connection settings [default: %default]")


    def main(self):
        """ set everything up, then invoke go() """

        (options, args) = self.parser.parse_args()

        log_level = logging.ERROR
        if options.debug:
            log_level = logging.DEBUG
        elif options.verbose:
            log_level = logging.INFO
        logging.basicConfig(level=log_level)    #, format='%(message)s')


        if options.test:
            self.store = DummyStore(self.name, self.doc_type)
        else:
            # load in config file for real run
            config = ConfigParser.ConfigParser()
            config.readfp(open(options.ini_file))
            auth_user = config.get("DEFAULT",'user')
            auth_pass = config.get("DEFAULT",'pass')
            server = config.get("DEFAULT",'server')

            self.store = Store(self.name, self.doc_type, auth_user=auth_user, auth_pass=auth_pass, server=server)


        if options.cache:
            logging.info("using .cache")
            opener = urllib2.build_opener(CacheHandler(".cache"))
            urllib2.install_opener(opener)

        self.go(options)


    def go(self,options):
        """ perform the actual scraping

        default implementation is to just call find_latest and process the discovered press releases.
        But it's likely derived classes will want to handle custom options for fetching historical
        data

        see prnewswire for an example.
        """


        if options.url:
            urls = [options.url,]
        else:
            urls = self.find_latest()

        self.process_batch(urls)


    def process_batch(self, urls):
        """ run through a list of urls, fetching, extracting and storing each in turn """

        # cull out ones we've got
        n_before = len(urls)
        urls = [url for url in urls if not self.store.already_got(url)]
        logging.info("processing %d urls (%d are new)", n_before, len(urls))

        err_cnt = 0
        try:

            for url in urls:
                try:
                    logging.debug("fetch %s",url)
                    response = urllib2.urlopen(url)
                    html = response.read()
                    # TODO: maybe just skip ones which redirect to other domains?
                    if response.geturl() != url:
                        logging.warning("Redirect detected %s => %s",url,response.geturl())
                    press_release = self.extract(html, url)

                    # encode text fields
                    # TODO: use isinstance(...,unicode) instead
                    for f in ('url','title','source','text','location','language','topics'):
                        if f in press_release:
                            press_release[f] = press_release[f].encode('utf-8')
                    self.store.add(press_release)

                except Exception as e:
                    logging.error("failed on %s: %s %s",url,e.__class__,e)
                    err_cnt += 1
        finally:
            self.store.save()


    def find_latest(self):
        """ obtain the list of "latest" press releases, whatever that means for a given target """
        return []


    def extract(self,html,url):
        """ extract a single downloaded press release """
        assert False    # need to implement in derived class!
Example #11
0
class OperateXls(object):
    """for excel file
    """
    def __init__(self):
        self.store = Store()
        self._log = logging.getLogger(__name__)
        self._log.setLevel(logging.DEBUG)

    def read_excel_cell(self, source_file, sheet_name, x_cell, y_cell,
                        **kwargs):
        """read_excel_cell to read excel specific cell.

        +--------------------+------+-------------------------------------------+
        | Input Paramaters   | Man. | Description                               |
        +--------------------+------+-------------------------------------------+
        | source_file        | yes  | Absolute path of excel file               |
        +--------------------+------+-------------------------------------------+
        | sheet_name         | yes  | sheet name in excel file                  |
        +--------------------+------+-------------------------------------------+
        | x_cell             | yes  | the number of row(start value is zero)    |
        +--------------------+------+-------------------------------------------+
        | y_cell             | yes  | the number of column(start value is zero) |
        +--------------------+------+-------------------------------------------+

        excel demo:

        +----+------+------+------+-----+
        |0   |A     |B     |C     |D    |
        +----+------+------+------+-----+
        |1   |0,0   |0,1   |0,2   |0,3  |
        +----+------+------+------+-----+
        |2   |1,0   |1,1   |1,2   |1,3  |
        +----+------+------+------+-----+
        |3   |2,0   |2,1   |2,2   |2,3  |
        +----+------+------+------+-----+
        |4   |3,0   |3,1   |3,2   |3,3  |
        +----+------+------+------+-----+

        Example:

        +--------------------+--------------+---------+----+----+
        |read_excel_cell     |/opt/xxx.xls  |Sheet1   |1   |2   |
        +--------------------+--------------+---------+----+----+

        """
        xls = XlsHandler(source_file, sheet_name)
        self.store.add(xls, alias=kwargs.get('alias'))
        xls.open_excel()
        content = xls.read_cell(x_cell, y_cell)
        self.store.remove(alias=kwargs.get('alias'))
        return content

    def modify_excel_cell(self, source_file, sheet_name, x_cell, y_cell,
                          in_value, **kwargs):
        """write_excel_cell to write or modify excel specific cell.
           make sure the excel file is closed when you use this keyword.

        +--------------------+------+-------------------------------------------+
        | Input Paramaters   | Man. | Description                               |
        +--------------------+------+-------------------------------------------+
        | source_file        | yes  | Absolute path of excel file               |
        +--------------------+------+-------------------------------------------+
        | sheet_name         | yes  | sheet name in excel file                  |
        +--------------------+------+-------------------------------------------+
        | x_cell             | yes  | the number of row(start value is zero)    |
        +--------------------+------+-------------------------------------------+
        | y_cell             | yes  | the number of column(start value is zero) |
        +--------------------+------+-------------------------------------------+
        | in_value           | yes  | new value need to write or modify         |
        +--------------------+------+-------------------------------------------+

        excel demo:

        +----+------+------+------+-----+
        |0   |A     |B     |C     |D    |
        +----+------+------+------+-----+
        |1   |0,0   |0,1   |0,2   |0,3  |
        +----+------+------+------+-----+
        |2   |1,0   |1,1   |1,2   |1,3  |
        +----+------+------+------+-----+
        |3   |2,0   |2,1   |2,2   |2,3  |
        +----+------+------+------+-----+
        |4   |3,0   |3,1   |3,2   |3,3  |
        +----+------+------+------+-----+

        Example:

        +--------------------+--------------+--------+---+---+-----+
        |modify_excel_cell   |/opt/xxx.xls  |Sheet1  |1  |2  |333  |
        +--------------------+--------------+--------+---+---+-----+

        """
        xls = XlsHandler(source_file, sheet_name)
        self.store.add(xls, alias=kwargs.get('alias'))
        # file operation, if exist, copy and modify, if not create one
        if os.path.isfile(source_file):
            xls.open_excel()
        else:
            xls.create_excel()

        # sheet operation, if exist, get index, if not create one
        xls.get_sheet_index(sheet_name)

        # cell operation
        xls.write_cell(x_cell, y_cell, in_value)

        # modification save
        xls.save()
        self.store.remove(alias=kwargs.get('alias'))
Example #12
0
class OperateBin(object):
    """for bin file
    """
    def __init__(self):
        self.store = Store()
        self.file_object = ''
        self.bin_handler = BinHandler(self.file_object)

    def read_bin_file(self, bin_file, tag_name, **kwargs):
        """read_bin_file to read bin file's text by tag name.
          <SwVersion>5912129</SwVersion>
          tag_name: SwVersion
          text: 5912129

        +--------------------+------+-------------------------------+
        | Input Paramaters   | Man. | Description                   |
        +--------------------+------+-------------------------------+
        | bin_file           | yes  | Path of bin file              |
        +--------------------+------+-------------------------------+
        | tag_name           | yes  | tag name in bin file <xxx>    |
        +--------------------+------+-------------------------------+
        | return             | tag name's text                      |
        +--------------------+--------------------------------------+

        Example:

        +----------+---------------+--------------+------------+
        | ${value} | read_bin_file | /opt/xxx.bin | SwVersion  |
        +----------+---------------+--------------+------------+

        """
        with open(bin_file, 'r+b') as self.file_object:
            self.bin_handler = BinHandler(self.file_object)
            self.store.add(self.bin_handler, alias=kwargs.get('alias'))
            try:
                value = self.bin_handler.find_tag_position(tag_name)
            except TAFileException:
                err_info = 'NOT FOUND'
                return err_info
            self.store.remove(alias=kwargs.get('alias'))
            return value[0][1].decode('utf-8')

    def modify_bin_file(self, bin_file, tag_name, new_value, **kwargs):
        """replace_text_in_bin_file to modify bin file's text by tag name,
           the new value should have the same length with old one.
           <SwVersion>5912129</SwVersion>
           tag_name: SwVersion
           text: 5912129

        +--------------------+------+------------------------------------------------+
        | Input Paramaters   | Man. | Description                                    |
        +--------------------+------+------------------------------------------------+
        | bin_file           | yes  | Path of bin file                               |
        +--------------------+------+------------------------------------------------+
        | tag_name           | yes  | Tag name in bin file <xxx>                     |
        +--------------------+------+------------------------------------------------+
        | new_value          | yes  | New value should have same length with old one |
        +--------------------+------+------------------------------------------------+

        Example:

        +---------------------------------+--------------+-----------+---------+
        | modify_bin_file                 | /opt/xxx.bin | SwVersion | 5912128 |
        +---------------------------------+--------------+-----------+---------+

        """
        with open(bin_file, 'r+b') as self.file_object:
            self.bin_handler = BinHandler(self.file_object)
            self.store.add(self.bin_handler, alias=kwargs.get('alias'))
            try:
                self.bin_handler.replace_text_in_bin_file(tag_name, new_value)
            except TAFileException:
                err_info = 'Fail to modify'
                return err_info
            self.store.remove(alias=kwargs.get('alias'))
Example #13
0
class OperateXml(object):
    """for setup/teardown/modify/add/remove/get xml file
    """
    def __init__(self):
        self.store = Store()

    def setup_xml(self, src_file, save_path=None, **kwargs):
        """setup xml file

        :param string src_file: the source xml file
        :param string save_path: the save xml file name.if None will use source file name
        """
        hander = XmlHandler()
        if not save_path:
            save_path = src_file
        hander.setup(src_file, save_path)
        self.store.add(hander, alias=kwargs.get('alias'))

    def teardown_xml(self, **kwargs):
        """teardown xml file
        """
        alias = kwargs.get('alias')
        self.store.get(alias).teardown()
        self.store.remove(alias=kwargs.get('alias'))

    def modify_xml_text(self, *args, **kwargs):
        """modify xml node text

        :param string args: such as .//managedObject[@class="LNBTS"]/p[@name="actDLCAggr"]:false
        """
        alias = kwargs.get('alias')
        self.store.get(alias).modify_node_text(*args)

    def modify_xml_attribute(self, *args, **kwargs):
        """modify xml node attribute

        :param string args: such as .//managedObject[@class\="NOKLTE:LNCEL_TDD"]:@version\=TL17_1610_01_1610_02
        """
        alias = kwargs.get('alias')
        self.store.get(alias).modify_node_attribute(*args)

    def add_xml_node(self, *args, **kwargs):
        """add xml node

        :param string args: such as .//managedObject[@class="LNBTS"]/list[@name="qciTab6"]/item:<p name="nbrDl">10240</p>
        """
        alias = kwargs.get('alias')
        self.store.get(alias).add_node(*args)

    def delete_xml_node(self, *args, **kwargs):
        """delete xml node

        :param string args: such as .//managedObject[@class="LNCEL"][@distName="0"]/list/item/p[@name="dFpucchF1b"]
        """
        alias = kwargs.get('alias')
        self.store.get(alias).delete_node(*args)

    def read_xml_text(self, *args, **kwargs):
        """get xml node text value

        :param string args: such as .//managedObject[@class="LNCEL"][@distName="0"]/list/item/p[@name="dFpucchF1b"]
        """
        alias = kwargs.get('alias')
        return self.store.get(alias).get_node_text(*args)

    def read_xml_attribute(self, *args, **kwargs):
        """get xml node attribute value

        :param string args: such as .//managedObject[@class\="NOKLTE:LNCEL_TDD"]:@version
        """
        alias = kwargs.get('alias')
        return self.store.get(alias).get_node_attribute(*args)
Example #14
0
class FtpUploadDownload(object):
    """upload and download files
    """
    def __init__(self):
        self.ftp_handler = FtpHandler()
        self.sftp_handler = SFtpHandler()
        self.store = Store()

    def ftp_download(self, host, port, usr, pwd, remote, local=None, **kwargs):
        """ftp download file
        :param host: such as '192.168.255.1'
        :param port: such as 21
        :param usr: username such as 'admin'
        :param pwd: password such as 'admin'
        :param remote: file in host such as '/tmp/rat_psconfig.xml'
        :param local: file path to save in local such as 'example/config1/rat1.xml','example/config1/', 'rat1.xml', None
        """
        self.store.add(self.ftp_handler, alias=kwargs.get('alias'))
        alias = kwargs.get('alias')
        self.store.get(alias).connect_ftp(host, port, usr, pwd)
        dnload_file = self.store.get(alias).ftp_download(remote, local)
        self.store.get(alias).close_ftp()
        self.store.remove(alias=kwargs.get('alias'))
        return dnload_file

    def ftp_upload(self, host, port, usr, pwd, local, remote=None, **kwargs):
        """ftp upload file
        :param host: such as '192.168.255.1'
        :param port: such as 21
        :param usr: username such as 'admin'
        :param pwd: password such as 'admin'
        :param local: file in local such as '/tmp/rat_psconfig.xml'
        :param remote: file path to save in host such as '/tmp/tmp1/rat_psconfig_test1.xml', '/tmp/tmp1/', 'rat_psconfig_test1.xml', None
        """
        self.store.add(self.ftp_handler, alias=kwargs.get('alias'))
        alias = kwargs.get('alias')
        self.store.get(alias).connect_ftp(host, port, usr, pwd)
        self.store.get(alias).ftp_upload(local, remote)
        self.store.get(alias).close_ftp()
        self.store.remove(alias=kwargs.get('alias'))

    def sftp_download(self,
                      host,
                      port,
                      usr,
                      pwd,
                      remote,
                      local=None,
                      **kwargs):
        """sftp download file or dir recursively
        :param host: such as '192.168.255.1'
        :param port: such as 22
        :param usr: username such as 'admin'
        :param pwd: password such as 'admin'
        :param remote: file in host such as '/tmp/rat_psconfig.xml' or folder '/tmp/'(NOT '/tmp')
        :param local: file path to save in local such as 'example/config1/rat1.xml','example/config1/', 'rat1.xml', None
        """
        self.store.add(self.sftp_handler, alias=kwargs.get('alias'))
        alias = kwargs.get('alias')
        dl_file = self.store.get(alias).sftp_download(host, port, usr, pwd,
                                                      remote, local)
        self.store.remove(alias=kwargs.get('alias'))
        return dl_file

    def download_latest_file(self,
                             host,
                             port,
                             usr,
                             pwd,
                             remote,
                             local=None,
                             **kwargs):
        """sftp download file
        :param host: such as '192.168.255.1'
        :param port: such as 22
        :param usr: username such as 'admin'
        :param pwd: password such as 'admin'
        :param remote: path in host such as '/tmp/'
        :param local: file path to save in local such as 'example/config1/rat1.xml','example/config1/', 'rat1.xml', None
        :param filter in **kwargs: such as: filter = PM.*.xml
        """
        self.store.add(self.sftp_handler, alias=kwargs.get('alias'))
        alias = kwargs.get('alias')
        self.store.get(alias).sftp_download_latest_file(
            host, port, usr, pwd, remote, local, filter=kwargs.get('filter'))
        self.store.remove(alias=kwargs.get('alias'))

    def sftp_upload(self, host, port, usr, pwd, local, remote=None, **kwargs):
        """sftp upload file
        :param host: such as '192.168.255.1'
        :param port: such as 22
        :param usr: username such as 'admin'
        :param pwd: password such as 'admin'
        :param local: file in local such as '/tmp/rat_psconfig.xml'
        :param remote: file path to save in host such as '/tmp/tmp1/rat_psconfig_test1.xml', '/tmp/tmp1/', 'rat_psconfig_test1.xml', None
        """
        self.store.add(self.sftp_handler, alias=kwargs.get('alias'))
        alias = kwargs.get('alias')
        self.store.get(alias).sftp_upload(host, port, usr, pwd, local, remote)
        self.store.remove(alias=kwargs.get('alias'))
Example #15
0
class BaseScraper(object):
    """ basic scraper framework for grabbing press releases

    Derived scrapers generally need to implement:
    name          - string name of the scraper
    doc_type      - numeric document type for uploaded press releases
    find_latest() - to grab a list of the latest press releases (usually
                    from an rss feed)
    extract()     - parse html data to pull out the various text and metadata
                    of the press release
    """

    require_same_domain = False
    disallow_redirects = False
    headers = {}

    def __init__(self):

        # derived classes need to set these
        assert self.name is not None
        assert self.doc_type is not None

        self.parser = OptionParser(usage="%prog: [options]")
        self.parser.add_option('-v', '--verbose', action='store_true')
        self.parser.add_option('-d', '--debug', action='store_true')
        self.parser.add_option(
            '-t',
            '--test',
            action='store_true',
            help="test only - don't send any documents to server")
        self.parser.add_option(
            '-c',
            '--cache',
            action='store_true',
            help=
            "cache all http transfers in .cache dir (for repeated runs during test)"
        )
        self.parser.add_option('-u',
                               '--url',
                               nargs=1,
                               help="process just the given URL")
        self.parser.add_option(
            '-i',
            '--ini-file',
            default="churnalism.cfg",
            nargs=1,
            help="filename for connection settings [default: %default]")

    def main(self):
        """ set everything up, then invoke go() """

        (options, args) = self.parser.parse_args()

        log_level = logging.ERROR
        if options.debug:
            log_level = logging.DEBUG
        elif options.verbose:
            log_level = logging.INFO
        logging.basicConfig(level=log_level)  #, format='%(message)s')

        if options.test:
            self.store = DummyStore(self.name, self.doc_type)
        else:
            # load in config file for real run
            config = ConfigParser.ConfigParser()
            config.readfp(open(options.ini_file))
            auth_user = config.get("DEFAULT", 'user')
            auth_pass = config.get("DEFAULT", 'pass')
            server = config.get("DEFAULT", 'server')

            self.store = Store(self.name,
                               self.doc_type,
                               auth_user=auth_user,
                               auth_pass=auth_pass,
                               server=server)

        self.go(options)

    def go(self, options):
        """ perform the actual scraping

        default implementation is to just call find_latest and process the discovered press releases.
        But it's likely derived classes will want to handle custom options for fetching historical
        data

        see prnewswire for an example.
        """

        if options.url:
            urls = [
                options.url,
            ]
        else:
            urls = self.find_latest()

        self.process_batch(urls)

    def process_batch(self, urls, extra_headers=None):
        """ run through a list of urls, fetching, extracting and storing each in turn """

        # cull out ones we've got
        n_before = len(urls)
        urls = [url for url in urls if not self.store.already_got(url)]
        logging.info("processing %d urls (%d are new)", n_before, len(urls))

        err_cnt = 0
        try:

            for url in urls:
                try:
                    logging.debug("fetch %s", url)
                    headers = {}
                    headers.update(self.headers)
                    if extra_headers:
                        headers.update(extra_headers)
                    response = requests.get(url, headers=headers)

                    # TODO: maybe just skip ones which redirect to other domains?
                    if response.url != url:
                        if self.disallow_redirects == True:
                            logging.warning(
                                "Skipping %s because it redirected to %s", url,
                                response.url)
                            continue
                        elif self.require_same_domain == True:
                            orig_location = urlparse.urlparse(url)
                            new_location = urlparse.urlparse(response.url)
                            if orig_location.netloc != new_location.netloc:
                                logging.warning(
                                    "Skipping %s because it redirected to another domain: %s",
                                    url, response.url)
                                continue

                    press_release = self.extract(response.text, url)

                    # encode text fields
                    # TODO: use isinstance(...,unicode) instead
                    for f in ('url', 'title', 'source', 'text', 'location',
                              'language', 'topics'):
                        if f in press_release:
                            press_release[f] = press_release[f].encode('utf-8')
                    self.store.add(press_release)

                except Exception as e:
                    logging.error("failed on %s: %s %s", url, e.__class__, e)
                    print traceback.print_exc()
                    err_cnt += 1
        finally:
            self.store.save()

    def find_latest(self):
        """ obtain the list of "latest" press releases, whatever that means for a given target """
        return []

    def extract(self, html, url):
        """ extract a single downloaded press release """
        assert False  # need to implement in derived class!
Example #16
0
class TestStore(unittest2.TestCase):
    
    def setUp(self):
        self.store = Store(name="scratch")
        self.ns = Namespace('http://example.com/#')
        
    def tearDown(self):
        self.store.close()
        
    def testSize(self):
        """ Tests the size of the repository """
        self.assertEqual(len(self.store),0)
        
    def testAdd(self):
        bob = self.ns['bob']
        name = self.ns['name']
        value = Literal('Bob Bilbins')
        self.store.add((bob, name, value))
        self.assertEqual(len(self.store),1)
        
        
    def testRemove(self):
        triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
        self.store.add(triple)
        self.assertEqual(len(self.store),1)
        self.store.remove(triple)
        self.assertEqual(len(self.store),0)
        
    def testTriples(self):
        """ Tests the search by triple. """
        triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
        self.store.add(triple)
        for tri in self.store.triples((self.ns['alice'],None, None)):
            for i in range(3):
                self.assertEqual(tri[i], triple[i])
                
    def testSimpleSparql(self):
        triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
        self.store.add(triple)
        for tri in self.store.query("SELECT ?s ?p ?o  WHERE {?s ?p ?o .}"):
            for i in range(3):
                self.assertEqual(tri[i], triple[i])
        
    def testNamespacedSparql(self):
        triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
        self.store.add(triple)
        self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob')))
        for tri in self.store.query("SELECT ?p ?o  WHERE { ex:alice ?p ?o .}", initNs={'ex':self.ns}):
            for i in range(1,3):
                self.assertEqual(tri[i-1], triple[i])
        
    def testBindedSparql(self):
        triple = (self.ns['alice'],self.ns['name'],Literal('Alice'))
        self.store.add(triple)
        self.store.add((self.ns['bob'],self.ns['name'],Literal('Bob')))
        for tri in self.store.query("SELECT ?p ?o  WHERE { ?s ?p ?o .}", initBindings={'s':self.ns['alice']}):
            for i in range(1,3):
                self.assertEqual(tri[i-1], triple[i])
    
    def testDataTypes(self):
        birth = Literal('2006-01-03', datatype=_XSD_NS.date)
        comp = Literal('2006-01-01', datatype=_XSD_NS.date)
        triple = (self.ns['alice'],self.ns['birthdate'],birth)
        self.store.add(triple)
        for s, p, o in self.store.query("SELECT ?s ?p ?o  WHERE {?s ?p ?o .}"):
            self.assertLess(comp,birth)
Example #17
0
class KeyboardHandler:
    def __init__(self):
        self.shouldRun = True
        self.store = Store()
        self.state = State()

    def OnKeyboardEvent(self, event):
        print('Ascii:', event.Ascii, chr(event.Ascii))
        lastPress = self.store.lastPress()
        keyPressed = chr(event.Ascii)
        self.store.add(keyPressed)

        if self.state.isStarting():
            if chr(event.Ascii) == 'e' and lastPress != 'e':
                # Prepare to exit, capture 'e'
                return False

            if chr(event.Ascii) != 'e' and lastPress == 'e':
                # Exit sequence aborted, release 'e'
                SendKeypress(0x12)

            if chr(event.Ascii) == 'e' and lastPress == 'e':
                # Run 'ee' exit command
                self.store.clear()
                self.shouldRun = False
                return False

            if chr(event.Ascii) == 'a' and lastPress != 'a':
                # Prepare to enter left click mode, capture 'a'
                return False

            if chr(event.Ascii) != 'a' and lastPress == 'a':
                # Left click sequence aborted, release 'a'
                SendKeypress(0x1E)

            if chr(event.Ascii) == 'a' and lastPress == 'a':
                # Enter 'aa' left click mode
                self.store.clear()
                self.state.enterMouseMode('leftClick')
                return False

        if self.state.isPositioning():
            # Placeholder
            if chr(event.Ascii) == 'a':
                self.store.clear()
                self.state.clear()
                return False

        if event.Ascii == 27:
            # Reset if 'esc' key entered
            self.store.clear()
            self.state.clear()

        return True

    def start(self):
        hookManager = pyHook.HookManager()
        hookManager.KeyDown = self.OnKeyboardEvent
        hookManager.HookKeyboard()

        while self.shouldRun:
            pythoncom.PumpWaitingMessages()