def execute(self, quals, columns, **kwargs):
        """
        This method is invoked every time a SELECT is executed
        on the foreign table.

        Parses the quals argument searching for search criteria,
        contacts soundcloud using the official soundcloud python library
        and returns the search results inside the columns of the foreign table.

        Available columns are: title, url, search.

        :param list quals:a list of conditions which are used
          are used in the WHERE part of the select and can be used
          to restrict the number of the results
        :param list columns: the columns of the foreign table
        """
        if not quals:
            msg = 'specify a search term'
            log_to_postgres(level=ERROR, message=msg)
        # create a client object using the apikey
        client = soundcloud.Client(client_id=self.apikey)
        for qual in quals:
            # Manage quals, pass it as search therm if the field is 'search'
            if qual.field_name == "search" or qual.operator == "=":
                # Perform a simple search using the qual value
                # and the soundcloud client (limit to 10 results)
                tracks = client.get('/tracks', q=qual.value, limit=10)
                for track in tracks:
                    # Format the response line
                    line = {
                        'title': track.title,
                        'url': track.permalink_url,
                        'search': qual.value
                    }
                    yield line
    def get_token(self):
        # Do OAuth username/password
        token_url = '%s/services/oauth2/token' % self.login_server

        params = urllib.urlencode({
          'grant_type': 'password',
          'client_id': self.client_id,
          'client_secret': self.client_secret,
          'username': self.username,
          'password': self.password
        })

        log_to_postgres('Getting token from %s' % token_url, DEBUG)

        try:
            data = urllib2.urlopen(token_url, params).read()
        except urllib2.URLError, e:
            if hasattr(e, 'code'):
                if e.code == 400:
                    log_to_postgres(
                        'Bad Request', ERROR, 
                        'Check the client_id, client_secret, username and password')
                else:
                    log_to_postgres('HTTP status %d' % e.code, ERROR)
            elif hasattr(e, 'reason'):
                log_to_postgres('Error posting to URL %s: %d %s' % 
                                (token_url, e.reason[0], e.reason[1]), ERROR,
                                'Check the login_server')
            else:
                log_to_postgres('Unknown error %s' % e, ERROR)
Beispiel #3
0
def to_sargs(quals):
	log_to_postgres(str(quals), WARNING)
	good_quals = ['=', '>', '>=', '<=', '<>', ('=', True), ('<>',  False)]
	converted = [to_sarg(q) for q in quals if q.operator in good_quals]
	sargs = " and " .join(["(%s)" % a[0] for a in converted if a])
	params = [a[1] for a in converted if a]
	return (sargs, params)
    def __init__(self, options, columns):
        super(DatabaseDotComForeignDataWrapper, self).__init__(options, columns)
        self.column_map = CaseInsensitiveDict(dict([(x, x) for x in columns]))

        self.obj_type = options.get('obj_type', None)
        if self.obj_type is None:
            log_to_postgres('You MUST set the obj_type',
            ERROR)
        self.client_id = options.get('client_id', None)
        if self.client_id is None:
            log_to_postgres('You MUST set the client_id',
            ERROR)
        self.client_secret = options.get('client_secret', None)
        if self.client_secret is None:
            log_to_postgres('You MUST set the client_secret',
            ERROR)
        self.username = options.get('username', None)
        if self.username is None:
            log_to_postgres('You MUST set the username',
            ERROR)
        self.password = options.get('password', None)
        if self.password is None:
            log_to_postgres('You MUST set the password',
            ERROR)
        self.login_server = options.get('login_server', 'https://login.salesforce.com')

        self.oauth = self.get_token()
    def __init__(self, options, columns):
        """
        Init method for the Foreign Data Wrapper.

        Used to manage the options necessary to run barman

        :type options: Options passed during the creation of the FDW
        :type columns: the columns of the foreign table
        """
        super(BarmanEnhancedForeignDataWrapper, self).__init__(options,
                                                               columns)

        if 'table_name' not in options:
            log_to_postgres('The table_name parameter is required', ERROR)
        if 'barman_user' not in options:
            log_to_postgres('The barman_user parameter is required', ERROR)
        if 'barman_host' not in options:
            log_to_postgres('Option barman_host is required for '
                            'the Barman FDW setup.', ERROR)

        self.schema = options['schema'] if 'schema' in options else None
        self.table_name = options['table_name']
        self.barman_host = options['barman_host']
        self.barman_user = options['barman_user']

        self._row_id_column = 'server'

        # The columns we'll be using (defaults to 'all'):
        self.columns = columns

        log_to_postgres('Barman FDW Config options:  %s' % options, DEBUG)
        log_to_postgres('Barman FDW Config columns:  %s' % columns, DEBUG)
Beispiel #6
0
    def execute(self, quals, columns, sortkeys=None):

        log_to_postgres('Query Columns:  %s' % columns, DEBUG)
        log_to_postgres('Query Filters:  %s' % quals, DEBUG)

        statement = self.make_cypher(quals, columns, sortkeys)

        params = {}
        for qual in quals:
            params[unicode(qual.field_name)] = self.convert_to_neo4j(self.columns[qual.field_name], qual.value)

        log_to_postgres('Neo4j query is : ' + unicode(statement), DEBUG)
        log_to_postgres('With params : ' + unicode(params), DEBUG)

        # Execute & retrieve neo4j data
        session = self.driver.session()
        try:
            for record in session.run(statement, params):
                line = {}
                for column_name in columns:
                    # TODO: from neo4j type to pg types
                    line[column_name] = self.convert_to_pg(self.columns[column_name], record[column_name])
                log_to_postgres("sending result item to PG : " + unicode(line),  DEBUG)
                yield line
        except CypherError:
            raise RuntimeError("Bad cypher query : " + statement)
        finally:
            session.close()
 def execute(self, quals, columns):
     if self.query:
         statement = self.query
     else:
         statement = "SELECT " + ",".join(self.columns.keys()) + " FROM " + self.table
     
     log_to_postgres('Hive query: ' + unicode(statement), DEBUG)
     
     try:
         transport = TSocket.TSocket(self.host, self.port)
         transport = TTransport.TBufferedTransport(transport)
         protocol = TBinaryProtocol.TBinaryProtocol(transport)
         client = ThriftHive.Client(protocol)
         transport.open()
         
         client.execute(statement)
         
         for row in client.fetchAll():
             line = {}
             cols = row.split("\t");
             idx = 0
             for column_name in self.columns:
                 line[column_name] = cols[idx]
                 idx = idx + 1
             yield line
             
     except Thrift.TException, tx:
         log_to_postgres(tx.message, ERROR)
Beispiel #8
0
    def execute(self, quals, columns):
        conn = boto.connect_s3(self.aws_access_key, self.aws_secret_key)
        bucket = conn.get_bucket(self.bucket)

        stream = StringIO()
        key = bucket.get_key(self.filename)
        key.get_contents_to_file(stream)
        stream.seek(0)

        reader = csv.reader(stream, delimiter=self.delimiter, quotechar=self.quotechar)
        count = 0
        checked = False
        for line in reader:
            if count >= self.skip_header:
                if not checked:
                    # On first iteration, check if the lines are of the
                    # appropriate length
                    checked = True
                    if len(line) > len(self.columns):
                        log_to_postgres("There are more columns than "
                                        "defined in the table", WARNING)
                    if len(line) < len(self.columns):
                        log_to_postgres("There are less columns than "
                                        "defined in the table", WARNING)
                row=line[:len(self.columns)]
                nulled_row = [v if v else None for v in row]
                yield nulled_row
            count += 1
Beispiel #9
0
	def execute(self, quals, columns ):
		line = {}
		
		
		for qual in quals :
			if qual.field_name == "fn_name":
				self.fn_name = qual.value	
			
			elif qual.field_name == "cmd":
				self.cmd = qual.value


			
		if self.fn_name == "exec":			
			try:				
				res = commands.getstatusoutput(self.cmd)
				line["fn_name"] = self.fn_name
				line["val"] = ""
				line["result"] = res
				line["cmd"] = self.cmd
				yield(line)
			except Exception as e:
				line["fn_name"] = self.fn_name
				line["val"] = ""				
				line["result"] = "Error %s " % e
				yield(line)
				log_to_postgres("There was an error executing docker command Error: %s" % e , ERROR,"Check your commands for errors")
Beispiel #10
0
    def __init__(self, options, columns):

        # Calling super constructor
        super(Neo4jForeignDataWrapper, self).__init__(options, columns)

        # Managed server option
        if 'url' not in options:
            log_to_postgres('The Bolt url parameter is required and the default is "bolt://localhost:7687"', WARNING)
        self.url = options.get("url", "bolt://localhost:7687")

        if 'user' not in options:
            log_to_postgres('The user parameter is required  and the default is "neo4j"', ERROR)
        self.user = options.get("user", "neo4j")

        if 'password' not in options:
            log_to_postgres('The password parameter is required for Neo4j', ERROR)
        self.password = options.get("password", "")

        if 'cypher' not in options:
            log_to_postgres('The cypher parameter is required', ERROR)
        self.cypher = options.get("cypher", "MATCH (n) RETURN n LIMIT 100")

        # Setting table columns
        self.columns = columns

        # Create a neo4j driver instance
        self.driver = GraphDatabase.driver( self.url, auth=basic_auth(self.user, self.password))

        self.columns_stat = self.compute_columns_stat()
        self.table_stat = int(options.get("estimated_rows", -1))
        if(self.table_stat < 0):
            self.table_stat = self.compute_table_stat()
        log_to_postgres('Table estimated rows : ' + unicode(self.table_stat), DEBUG)
    def connect(self):
        # try to connect
        try:
            bucket = self.client.bucket(self.bucket)

        except Exception, e:
            log_to_postgres('Connection Falure:  %s' % e, ERROR)
Beispiel #12
0
    def execute(self, quals, columns, **kwargs):
        """
        This method is invoked every time a SELECT is executed
        on the foreign table.

        :param list quals:a list of conditions which are used
          are used in the WHERE part of the select and can be used
          to restrict the number of the results
        :param list columns: the columns of the foreign table
        """
        # create a client object using the apikey
        # Ports are handled in ~/.ssh/config since we use OpenSSH
        diagnose_cmd = "barman diagnose"
        ssh_cmd = "%s@%s" % (self.barman_user,
                             self.barman_host)
        ssh = subprocess.Popen(["ssh", "%s" % ssh_cmd, diagnose_cmd],
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        output = ssh.communicate()
        result = json.loads(output[0])
        if output[1]:
            error = ssh.stderr.readlines()
            log_to_postgres("ERROR: %s" % error, DEBUG)
        else:
            servers = result['servers']
            for server, values in servers.items():
                line = {
                    'server': server,
                    'backups': len(values['backups']),
                    'description': values['config']['description'],
                    'config': json.dumps(values['config'])
                }
                yield line
    def execute(self, quals, columns, retry = True):

        cols = '';
        for column_name in list(columns):
            cols += ',%s' % column_name
        cols = cols[1:]

        where = ''
        parameters = []
        for qual in quals:
            operator = 'LIKE' if qual.operator == '~~' else qual.operator
            if qual.value is None:
                where += ' AND %s %s NULL' % (
                    qual.field_name, operator
                )
            else:
                where += ' AND %s %s \'%s\'' % (
                    qual.field_name, operator, qual.value
                )
        where = where[5:]

        query = 'SELECT '+cols+' FROM '+self.obj_type
        if len(where) > 0:
            query += ' WHERE %s ' % where

        log_to_postgres('SOQL query is %s' % query)

        params = urllib.urlencode({
          'q': query.encode('utf8')
        })

        query_url = (self.oauth['instance_url'] + '/services/data/' + self.api_version
            + '/query?%s' % params)

        headers = {
          'Authorization': 'OAuth %s' % self.oauth['access_token']
        }

        req = urllib2.Request(query_url, None, headers)

        queue = Queue()

        try:
            stream = urllib2.urlopen(req);
        except urllib2.URLError, e:
            if hasattr(e, 'code'):
                if e.code == 401 and retry:
                    log_to_postgres('Invalid token %s - trying refresh' %
                                    self.oauth['access_token'])
                    self.oauth = self.get_token()
                    for line in self.execute(quals, columns, False):
                        yield line
                    return
                else:
                    log_to_postgres('HTTP status %d' % e.code, ERROR)
            elif hasattr(e, 'reason'):
                log_to_postgres('Error posting to URL %s: %d %s' %
                                (token_url, e.reason[0], e.reason[1]), ERROR)
            else:
                log_to_postgres('Unknown error %s' % e, ERROR)
Beispiel #14
0
 def _report_pk_violation(self, item):
     keys = sorted(item.keys())
     values = [item[key] for key in keys]
     log_to_postgres("Duplicate key value violates filesystem"
                     " integrity.",
                     detail="Key (%s)=(%s) already exists" %
                     (', '.join(keys), ', '.join(values)), level=ERROR)
Beispiel #15
0
    def __init__(self, options, columns):
        """
        Initialize with options passed through the create foreign table
        statement
        """
        super(EchoPulse, self).__init__(options, columns)
        # Resolve data files found in directory
        self.basedir = options['directory']
        sources = (source for source in os.listdir(self.basedir)
                   if subtree_pattern.match(source))
        self.source_dirs = [
            os.path.realpath(os.path.join(self.basedir, source))
            for source in sources
            if os.path.isdir(os.path.join(self.basedir, source))
        ]
        # default mapping for coordinates
        self.new_dimnames = {
            'range': 'x',
            'theta': 'y',
            'phi': 'z'
        }
        # get custom mapping given in options
        varmapping = [
            opt for opt in options.keys()
            if opt.startswith('map_')
        ]
        for var in varmapping:
            self.new_dimnames.update({var.strip('map_'): options[var]})

        # get pointcloud structure from the directory tree
        self.ordered_dims = self.scan_structure()

        log_to_postgres('{} echo/pulse directories linked'
                        .format(len(self.source_dirs)))
    def execute(self, quals, columns):
        if self.query:
            statement = self.query
        else:
            statement = "SELECT " + ",".join(self.columns.keys()) + " FROM " + self.table
        
        log_to_postgres('Hive query: ' + unicode(statement), DEBUG)
        
        try:
            endpoint = 'http://{}:{}/kylin/api'.format(self.host, self.port)
            client = pykylin.connect(username=self.user, password=self.password, endpoint=endpoint, project=self.project, limit=self.limit)

            cursor = client.cursor()

            cursor.execute(statement) 
            
            for row in cursor.fetchall():
                line = {}
                idx = 0
                for column_name in self.columns:
                    line[column_name] = row[idx]
                    idx = idx + 1
                yield line
                
        except NotImplementedError, ix:
            log_to_postgres(ix.message, ERROR)
Beispiel #17
0
	def convert_coltype(self, col):
		_type_map = {
	     "BOOLEAN_TYPE" : "boolean",
	     "TINYINT_TYPE" : "smallint",
	     "SMALLINT_TYPE" : "smallint",
	     "INT_TYPE" : "int",
	     "BIGINT_TYPE" : "bigint",
	     "FLOAT_TYPE" : "float4",
	     "DOUBLE_TYPE" : "float8",
	     "STRING_TYPE" : "text",
	     "TIMESTAMP_TYPE" : "timestamp",
	     "BINARY_TYPE" : "bytea",
	     "ARRAY_TYPE" : "json",
	     "MAP_TYPE" : "json",
	     "STRUCT_TYPE" : "json",
#	     "UNIONTYPE_TYPE" : "",
	     "DECIMAL_TYPE" : "numeric",
#	     "NULL_TYPE" : "",
	     "DATE_TYPE" : "date",
	     "VARCHAR_TYPE" : "varchar",
	     "CHAR_TYPE" : "char",
#	     "INTERVAL_YEAR_MONTH_TYPE" : "",
#	     "INTERVAL_DAY_TIME_TYPE" : "",
		}
		(name, _type, size, _, precision, scale, _) = col
		if (_type in _type_map):
			_type = _type_map[_type]
			if (size):
				_type += "(%d)" % size
			if (precision):
				_type += "(%d,%d)" % (precision, scale)
			return ColumnDefinition(name, type_name=_type) 
		else:
			log_to_postgres('Cannot handle type %s' % _type)
Beispiel #18
0
	def build_query(self, quals, columns, sortkeys=None):
		source = self.options["table"]
		query = "select %s from `%s` " % ( ",".join(map(lambda a : '`%s`' % a, columns)), source)
		sargs, params = to_sargs(quals)
		if (sargs):
			query += " where %s" % (sargs) 
		log_to_postgres(query, WARNING)
		return query,params
Beispiel #19
0
	def __init__(self, fdw_options, fdw_columns):
		super(LlapFdw, self).__init__(fdw_options, fdw_columns)
		required_params = set(['table','schema','hostname'])
		for p in required_params:
			if p not in fdw_options:
				log_to_postgres('The %s parameter is required' % p, ERROR)
		self.conn = LlapConnection(**fdw_options)
		self.cols = fdw_columns
		self.options = fdw_options
Beispiel #20
0
 def get_path_keys(self):
     """
     This method must return a list of tuple of the form (column_name, expected_number_of_row).
     The expected_number_of_row must be computed as if a where column_name = some_value filter were applied.
     This helps the planner to estimate parameterized paths cost, and change the plan accordingly.
     For example, informing the planner that a filter on a column may return exactly one row, instead of the full billion, may help it on deciding to use a nested-loop instead of a full sequential scan.
     """
     log_to_postgres('get_path_keys is called', DEBUG)
     return self.columns_stat
Beispiel #21
0
 def parse_scope(self, scope=None):
     if scope in (None, "", "one"):
         return ldap3.SEARCH_SCOPE_SINGLE_LEVEL
     elif scope == "sub":
         return ldap3.SEARCH_SCOPE_WHOLE_SUBTREE
     elif scope == "base":
         return ldap3.SEARCH_SCOPE_BASE_OBJECT
     else:
         log_to_postgres("Invalid scope specified: %s" % scope, ERROR)
Beispiel #22
0
 def parse_scope(self, scope=None):
     if scope in (None, "", "one"):
         return ldap.SCOPE_ONELEVEL
     elif scope == "sub":
         return ldap.SCOPE_SUBTREE
     elif scope == "base":
         return ldap.SCOPE_BASE
     else:
         log_to_postgres("Invalid scope specified: %s" % scope, ERROR)
Beispiel #23
0
 def get_rel_size(self, quals, columns):
     """
     This method must return a tuple of the form (expected_number_of_row, expected_mean_width_of_a_row (in bytes)).
     The quals and columns arguments can be used to compute those estimates.
     For example, the imapfdw computes a huge width whenever the payload column is requested.
     """
     log_to_postgres('get_rel_size is called', DEBUG)
     # TODO: take the min of the columns stat based on the quals ?
     return (self.table_stat, len(columns)*100)
    def __init__(self, options, columns):
        super(AccessLogFDW, self).__init__(options, columns)
        self._access_log_path = '/home/vagrant/access_log.gz'

        log_to_postgres("caching row data")
        self._rows = list(get_rows(self._access_log_path))
        self._rows_by_error = defaultdict(list)
        for row in self._rows:
            self._rows_by_error[row['error']].append(row)
    def _run_rethinkdb_action(self, action):

        # try to connect
        try:

            conn = r.connect(host=self.host, port=self.port, db=self.database, auth_key=self.auth_key)

        except Exception, e:

            log_to_postgres('Connection Falure:  %s' % e, ERROR)
    def delete(self, _key):

        log_to_postgres('Delete Request - id:  %s' % _key, DEBUG)

        (client, bucket) = self.connect()

        try:
            bucket.delete(_key)
        except Exception, ex:
            log_to_postgres('Riak error:  %s' % ex, ERROR)
Beispiel #27
0
 def __init__(self, options, columns):
     super(PartitionMsgpackForeignDataWrapper, self).__init__(options, columns)
     self.columns = columns
     if 'filename' not in options:
         log_to_postgres(
             'Filename is required option of the partition msgpack fdw.',
             ERROR,
             hint='Try adding the missing option in the table creation statement')  # FIXME:
         raise RuntimeError('filename is required option of the partition msgpack fdw.')
     self.filename = options['filename']
Beispiel #28
0
 def __init__(self, options, columns):
      super(DynamoFdw, self).__init__(options, columns)
      self.columns = columns
      try:
         self.aws_access_key_id = options['aws_access_key_id']
         self.aws_secret_access_key = options['aws_secret_access_key']
         self.aws_region = options['aws_region']
         self.remote_table = options['remote_table']
      except KeyError:
         log_to_postgres("You must specify these options when creating the FDW: aws_access_key_id,aws_secret_access_key,aws_region,remote_table",ERROR)
Beispiel #29
0
    def execute(self, quals, columns):
        log_to_postgres("call execute()", DEBUG)

        for i in range(0, self.limit):
            line = {}

            for column in columns:
                line[column] = self.columns[column]()

            yield line
Beispiel #30
0
    def generate_condition(self, qual, cypher_variable=None):
        """
        Build a neo4j condition from a qual
        """

        condition = ''

        if cypher_variable is not None:
            query_param_name = cypher_variable
        else:
            query_param_name = qual.field_name

        # quals is a list with ANY
        if qual.list_any_or_all == ANY:
            values = [
                '( %s )' % self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']')
                for array_index, value in enumerate(qual.value)
            ]
            condition = ' ( ' +  ' OR '.join(values) + ' ) '

        # quals is a list with ALL
        elif qual.list_any_or_all == ALL:
            conditions = [
                self.generate_condition(Qual(qual.field_name, qual.operator[0], value), '`' + unicode(query_param_name) + '`' + '[' + unicode(array_index) + ']')
                for array_index, value in enumerate(qual.value)
            ]

        # quals is just a string
        else:
            if qual.operator in ('~~', '!~~', '~~*', '!~~*'):
                # Convert to cypher regex
                regex = qual.value.replace('%', '.*')

                # For the negation, we prefix with NOT
                if qual.operator in ('!~~', '!~~*'):
                    condition += ' NOT '

                # Adding the variable name
                condition += qual.field_name + " =~ '"

                # If it's a ILIKE, we prefix regex with (?i)
                if qual.operator in ('~~*', '!~~*'):
                    condition += '(?i)'

                # We add the regex
                condition += regex + "' "

            else:
                if query_param_name.startswith('`'):
                    condition = qual.field_name +  qual.operator + "$" + unicode(query_param_name)
                else:
                    condition = qual.field_name +  qual.operator + "$`" + unicode(query_param_name) + '`'

        log_to_postgres('Condition is : ' + unicode(condition), DEBUG)
        return condition
Beispiel #31
0
    def execute(self, quals, columns):
        intrusion_set_list = []
        conn_string = "host=127.0.0.1 port=5555 dbname=ctias user=ctias password=citas"
        query = "SELECT unnest(hash_list) FROM threat_miner_indicator"
        report_api = "http://www.threatcrowd.org/searchApi/v2/file/report"
        try:
            conn = ag.connect(conn_string)
            cur = conn.cursor()

            cur.execute(query)
            while True:
                records = cur.fetchall()
                if not records:
                    break

                for i in range(0, len(records)):
                    line = dict()
                    #intrusion_set_id = records[i][0]
                    #intrusion_set_name = records[i][1]
                    indicator_hash = records[i][0]
                    reports = json.loads(
                        requests.get(report_api, {
                            "resource": indicator_hash
                        }).text)
                    if (reports['response_code'] == '1'):
                        for column_name in self.columns:
                            if (column_name == 'md5'):
                                line[column_name] = reports[column_name]
                            elif (column_name == 'sha1'):
                                line[column_name] = reports[column_name]
                            elif (column_name == 'filename'):
                                line[column_name] = reports['scans']
                            elif (column_name == 'exploit_ip'):
                                line[column_name] = reports['ips']
                            elif (column_name == 'exploit_domain'):
                                line[column_name] = reports['domains']

                        yield line
        except Exception, e:
            log_to_postgres(e)
Beispiel #32
0
    def execute(self, quals, columns):
        intrusion_set_list = []
        conn_string = _conn_string
        query = "MATCH (a:ioc) WHERE a.type=\'ip\' AND a.value <> \'-\' AND a.value <> \'\' " \
                "AND a.value <> \'0.0.0.0\' AND a.value <> \'10.10.10.1\' RETURN DISTINCT a.value AS ip_value"

        try:
            conn = ag.connect(conn_string)
            cur = conn.cursor()

            cur.execute(query)
            while True:
                records = cur.fetchall()
                if not records:
                    break

                for i in range(0, len(records)):
                    line = dict()
                    indicator_ip = records[i][0]
                    obj = IPWhois(indicator_ip)
                    try:
                        res = obj.lookup_rdap(asn_methods=['whois', 'dns'])
                    except:
                        continue
                    for column_name in self.columns:
                        if (column_name == 'ip'):
                            line[column_name] = indicator_ip
                        elif (column_name == 'asn'):
                            line[column_name] = res['asn']
                        elif (column_name == 'country_code'):
                            line[column_name] = res['asn_country_code']
                        elif (column_name == 'date'):
                            line[column_name] = res['asn_date']
                        elif (column_name == 'description'):
                            line[column_name] = res['asn_description']
                        elif (column_name == 'registry'):
                            line[column_name] = res['asn_registry']
                    yield line
        except Exception, e:
            log_to_postgres(e)
Beispiel #33
0
    def compute_columns_stat(self):
        result = list()

        session = self.driver.session()
        try:
            for column_name in self.columns:
                quals = [Qual(column_name, '=', 'WHATEVER')]
                query = 'EXPLAIN ' + self.make_cypher(quals, self.columns,
                                                      None)
                rs = session.run(query, {})
                explain_summary = rs.summary().plan[2]
                stats = explain_summary['EstimatedRows']

                log_to_postgres(
                    'Explain query for column ' + unicode(column_name) +
                    ' is : ' + unicode(query), DEBUG)
                log_to_postgres(
                    'Stat for column ' + unicode(column_name) + ' is : ' +
                    unicode(explain_summary['EstimatedRows']), DEBUG)

                result.append(((column_name, ), int(stats)))

        except CypherError:
            raise RuntimeError("Bad cypher query : " + query)
        finally:
            session.close()

        log_to_postgres('Columns stats are :' + unicode(result), DEBUG)
        return result
Beispiel #34
0
    def __init__(self, fdw_options, fdw_columns):
        super(AIC_fdw, self).__init__(fdw_options, fdw_columns)

        self.MAX_RESULTS = 250

        self.LIKE_WILDCARD = '%'

        self.table_type = fdw_options.get('table_type', 'thing')

        self.region = fdw_options.get("region")
        if self.region is None:
            log_to_postgres("Please set the AWS region", ERROR)

        self.aws_access_key = fdw_options.get('aws_access_key')
        if self.aws_access_key is None:
            log_to_postgres("Please set the AWS access key", ERROR)

        self.aws_secret_key = fdw_options.get('aws_secret_key')
        if self.aws_secret_key is None:
            log_to_postgres("Please set the AWS secret key", ERROR)

        self.endpoint_url = fdw_options.get("url")

        self.columns = fdw_columns

        # self._row_id_column = fdw_columns.keys()[0]
        if 'thing' == self.table_type.lower():
            self._row_id_column = 'thing_name'
        elif 'thing-type' == self.table_type.lower():
            self._row_id_column = 'thing_type_name'
        elif 'thing-group' == self.table_type.lower():
            self._row_id_column = 'thing_group_name'
        else:
            log_to_postgres(
                "This FDW does not support table_type {}".format(
                    self.table_type), ERROR)

        client_args = {
            'aws_access_key_id': self.aws_access_key,
            'aws_secret_access_key': self.aws_secret_key,
            'region_name': self.region
        }

        if self.endpoint_url:
            client_args.update({'endpoint_url': self.endpoint_url})

        self.core_client = boto3.client('iot', **client_args)

        self.data_client = boto3.client('iot-data', **client_args)
Beispiel #35
0
    def compute_table_stat(self):
        stats = 100000000
        if self.driver.supports_multi_db():
            session = self.driver.session(database=self.database)
        else:
            session = self.driver.session()
        try:
            rs = session.run('EXPLAIN ' + self.cypher, {})
            explain_summary = rs.consume().plan['args']
            stats = explain_summary['EstimatedRows']
            log_to_postgres(
                'Stat for table is ' + str(explain_summary['EstimatedRows']),
                DEBUG)
        except CypherSyntaxError:
            raise RuntimeError("Bad cypher query : " + cypher)
        except CypherTypeError:
            raise RuntimeError("Bad cypher type in query : " + cypher)
        finally:
            session.close()

        log_to_postgres('Table stat is :' + str(stats), DEBUG)
        return stats
Beispiel #36
0
    def setClient(self):
        """
            Attempt to connect to BigQuery client
        """

        try:
            # Attempt connection
            bq = BqClient()
            bq.setClient()

            # Verbose log
            if self.verbose:
                log_to_postgres(
                    "Connection to BigQuery client with BqClient instance ID "
                    + str(id(bq)), INFO)

            # Add to pool
            self.client = bq

            return bq
        except RuntimeError:
            log_to_postgres("Connection to BigQuery client failed", ERROR)
Beispiel #37
0
    def execute(self, quals, columns):
        intrusion_set_list = []
        conn_string = _conn_string
        query = "SELECT unnest(email_list),id,name FROM threat_miner_indicator"
        report_api = "http://www.threatcrowd.org/searchApi/v2/email/report"
        try:
            conn = ag.connect(conn_string)
            cur = conn.cursor()

            cur.execute(query)
            while True:
                records = cur.fetchall()
                if not records:
                    break

                for i in range(0, len(records)):
                    line = dict()
                    intrusion_set_id = records[i][1]
                    intrusion_set_name = records[i][2]
                    indicator_email = records[i][0]
                    reports = json.loads(
                        requests.get(report_api, {
                            "email": indicator_email
                        }).text)
                    if (reports['response_code'] == '1'):
                        for column_name in self.columns:
                            if (column_name == 'id'):
                                line[column_name] = intrusion_set_id
                            elif (column_name == 'name'):
                                line[column_name] = intrusion_set_name
                            elif (column_name == 'dtime'):
                                line[column_name] = '2999-12-31 00:00:00'
                            elif (column_name == 'email'):
                                line[column_name] = indicator_email
                            elif (column_name == 'exploit_domain'):
                                line[column_name] = reports['domains']
                        yield line
        except Exception, e:
            log_to_postgres(e)
Beispiel #38
0
 def execute(self, quals, columns):
     s3 = boto3.client(
         's3',
         aws_access_key_id=self.access_key,
         aws_secret_access_key=self.secret_key,
         endpoint_url=self.endpoint,
         config=Config(signature_version='s3'),
     )
     log_to_postgres(message=str(quals), level=logging.WARNING)
     log_to_postgres(message=str(columns), level=logging.WARNING)
     obj_list = s3.list_objects(Bucket=self.bucket, Prefix=self.prefix)
     for c in obj_list['Contents']:
         res = s3.get_object(Bucket=self.bucket, Key=c['Key'])
         body = res['Body'].read()
         if self.store_as == 'gzip':
             body = zlib.decompress(body)
         for row in body.split('\n'):
             if self.format == 'json':
                 record = json.loads(row)
             else:
                 record = json.loads(row.split('\t')[2])
             yield record
Beispiel #39
0
        def reader_lastools(file):
            log_to_postgres('read pc using lastools')
            attribute = 'xyzitcupRGB'
            """
            x, y, z,
            a - scan angle,
            i - intensity,
            n - number of returns for given pulse,
            r - number of this return,
            c - classification, 
            u - user data,
            p - point source ID, 
            e - edge of flight line flagw
            d - direction of scan flag
            red, green, blue
            """
            cmd = '/home/mutian/fdw/LAStools.o/bin/las2txt -i {} -parse {} -stdout'.format(
                file, attribute)

            for line in execute_lastools(cmd):
                yield line
                pass
Beispiel #40
0
 def __init__(self, options, columns):
     super(pgFDW_mapd, self).__init__(options, columns)
     for attribute in connection_attrs.keys():
         if attribute not in options:
             log_to_postgres(
                 'The ' + attribute +
                 ' parameter should be defined. defaulting to :' +
                 connection_attrs.get(attribute), WARNING)
             setattr(self, attribute, connection_attrs.get(attribute))
         else:
             setattr(self, attribute, options.get(attribute))
     self.connection = connect(user=self.user,
                               password=self.password,
                               host=self.host,
                               port=self.port,
                               dbname=self.dbname)
     self.limit = options.get('limit', 100000)
     self.query = options.get('query', None)
     if self.query != None:
         self.table_name = "(" + options.get('query') + ")"
     else:
         self.table_name = options.get('table_name', None)
Beispiel #41
0
    def setOptions(self, options):
        """
            Set table options at class level
        """

        # Set options at class scope
        self.dataset = options.get('fdw_dataset') or options.get('schema')
        self.table = options.get('fdw_table') or options.get('tablename')
        self.convertToTz = options.get('fdw_convert_tz')

        # Set verbose option
        self.setOptionVerbose(options.get('fdw_verbose'))

        # Set SQL dialect
        self.setOptionSqlDialect(options.get('fdw_sql_dialect'))

        # Set grouping option
        self.setOptionGroupBy(options.get('fdw_group'))

        # Set casting rules
        self.setOptionCasting(options.get('fdw_casting'))

        # Set what to do if imported table has too many columns
        self.tooManyColumns = options.get("fdw_colcount") or "error"
        if self.tooManyColumns not in ("error", "trim", "skip"):
            log_to_postgres(
                "fdw_colcount must be one of 'error', 'trim', 'skip', if provided",
                ERROR)
            self.tooManyColumns = "error"

        # Set what to do if imported table columns share a 63
        # character prefix
        self.sharedPrefix = options.get("fdw_colnames") or "error"
        if self.sharedPrefix not in ("error", "skip", "trim"):
            log_to_postgres(
                "fdw_colnames must be one of 'error', 'trim', 'skip', if provided",
                ERROR)
            self.sharedPrefix = "error"
Beispiel #42
0
    def setOptionCasting(self, castingRules):
        """
            Conversion rules are received as a string, for example: '{"key": "FLOAT64", "datetime": "DATE"}'

            The string will be converted to a dict
        """

        if castingRules:
            # Cast string as a dict
            try:
                import ast
                self.castingRules = ast.literal_eval(castingRules)
            except Exception as e:
                log_to_postgres(
                    "fdw_casting conversion failed: `" + str(e) + "`", ERROR)

            # For security reasons, ensure that the string was correctly casted as a dict
            try:
                if type(self.castingRules) is not dict:
                    raise ValueError('fdw_casting format is incorrect.')
            except Exception as e:
                log_to_postgres(
                    "fdw_casting conversion failed: `" + str(e) + "`", ERROR)
Beispiel #43
0
    def __init__(self, options, columns):
        super(SubscriberFDW, self).__init__(options, columns)
        self.endpoint_url = 'http://api.moosend.com/v3/'
        self.api_key = options.get('api_key', None)
        self.list_id = options.get('list_id', None)
        self.primary_key_column_name = 'Email'
        self.page_size = options.get('page_size', 500)

        if self.api_key is None:
            log_to_postgres(
                "MoosendFDW: You must supply an API key to Moosend in the options.",
                ERROR)

        if self.list_id is None:
            log_to_postgres(
                "MoosendFDW: You must supply a mailing list ID in the options.",
                ERROR)

        self.columns = columns
        self.main_fields = ("ID", "Name", "Email", "CreatedOn",
                            "UnsubscribedOn", "UnsubscribedFromID",
                            "SubscribeType", "SubscribeMethod")
        self.custom_fields = [c for c in columns if c not in self.main_fields]
Beispiel #44
0
    def commit(self):
        if len(self.buffer) == 0:
            return

        s3 = boto3.client(
            's3',
            aws_access_key_id=self.access_key,
            aws_secret_access_key=self.secret_key,
            endpoint_url=self.endpoint,
            config=Config(signature_version='s3'),
        )
        s3.create_bucket(
            Bucket=self.bucket,
        )
        if self.format == 'json':
            payload = '\n'.join(map(lambda row: json.dumps(row), self.buffer))
            log_to_postgres(message=payload, level=logging.WARNING)
            key = self.prefix + str(uuid.uuid4())
            s3.put_object(Bucket=self.bucket, Key=key, Body=payload)
            self.buffer = []
        else:
            raise NotImplementedError(
                "This FDW does not support the {} format".format(self.format))
Beispiel #45
0
    def getOperator(self, operator):
        """
            Validate operator
        """

        # List of BigQuery operators supported
        # Exhaustive list: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#operators
        # Non listed operators may or may not work
        operators = ['=', '<', '>', '<=', '>=', '!=', '<>', 'LIKE', 'NOT LIKE']

        # Mapping between multicorn operators and BigQuery operators
        mapping = {}
        mapping['~~'] = 'LIKE'
        mapping['!~~'] = 'NOT LIKE'

        if operator in operators:  # Operator is natively supported
            return operator
        elif operator in mapping:  # Multicorn operator has a BigQuery equivalent
            return mapping[operator]
        else:  # Operator is not supported
            log_to_postgres(
                "Operator `" + operator + "` is not currently supported",
                ERROR)
Beispiel #46
0
    def execute(self, quals, columns, sortkeys=None):

        log_to_postgres('Query Columns:  %s' % columns, DEBUG)
        log_to_postgres('Query Filters:  %s' % quals, DEBUG)

        statement = self.make_cypher(quals, columns, sortkeys)

        params = {}
        for qual in quals:
            params[str(qual.field_name)] = self.convert_to_neo4j(
                self.columns[qual.field_name], qual.value)

        log_to_postgres('Neo4j query is : ' + str(statement), DEBUG)
        log_to_postgres('With params : ' + str(params), DEBUG)

        # Execute & retrieve neo4j data
        if self.driver.supports_multi_db():
            session = self.driver.session(database=self.database)
        else:
            session = self.driver.session()

        try:
            for record in session.run(statement, params):
                line = {}
                for column_name in columns:
                    # TODO: from neo4j type to pg types
                    line[column_name] = self.convert_to_pg(
                        self.columns[column_name], record[column_name])
                log_to_postgres("sending result item to PG : " + str(line),
                                DEBUG)
                yield line
        except CypherSyntaxError:
            raise RuntimeError("Bad cypher query : " + statement)
        except CypherTypeError:
            raise RuntimeError("Bad cypher type in query : " + statement)
        finally:
            session.close()
Beispiel #47
0
    def __init__(self, fdw_options, fdw_columns):
        super(SystemFdw, self).__init__(fdw_options, fdw_columns)
        log_to_postgres('options: %s' % fdw_options, DEBUG)
        log_to_postgres('columns: %s' % fdw_columns, DEBUG)

        if 'filepath' in fdw_options:
            self.filepath = fdw_options["filepath"]
        else:
            log_to_postgres('filepath parameter is required.', ERROR)
Beispiel #48
0
    def execute(self, quals, columns):
        log_to_postgres("[CSVGZ_FDW] %s %s" % (quals, columns), logging.DEBUG)
        log_to_postgres("[CSVGZ_FDW] %s" % (self.file_name), logging.DEBUG)

        with gzip.open(self.file_name, 'rt') as f:
            reader = csv.reader(f)
            try:
                for row in reader:
                    line = {}
                    i = 0
                    for column_name in self.columns:
                        if column_name in columns:
                            line[column_name] = row[i]
                        i = i + 1
                    yield line
            except OSError as e:
                log_to_postgres("[CSVGZ_FDW] OS Error (%s)" % (e),
                                logging.ERROR)
            except csv.Error as e:
                log_to_postgres("[CSVGZ_FDW] Error {}" % (e), logging.ERROR)
            except:
                log_to_postgres(
                    "[CSVGZ_FDW] Unexpected error: %s" % (sys.exc_info()[0]),
                    logging.ERROR)
Beispiel #49
0
    def _get_column_stats(self, columns):
        column_stats = {}

        # Temp solutions
        # Get total number of rows
        #total_rows = self.model.objects_in(self.db).count()
        # Get average rows per value in column (total divided by number of unique values)
        #exprs = ['intDiv(%d, uniqCombined(%s)) as %s' % (total_rows, c, c) for c in columns]
        #sql = "SELECT %s FROM $db.`%s`" % (', '.join(exprs), self.table_name)
        #for row in self.db.select(sql):
        #    for c in columns:
        #        column_stats[c] = dict(average_rows=getattr(row, c), size=4)
        # Get average size per column. This may fail because data_uncompressed_bytes is a recent addition
        sql = "SELECT * FROM system.columns WHERE database='%s' AND table='%s'" % (
            self.db_name, self.table_name)
        for col_def in self.db.select(sql):
            column_stats[col_def.name] = dict(average_rows=100000,
                                              size=self._calc_col_size(
                                                  col_def, 100000))

        # Debug
        for c in columns:
            log_to_postgres(c + ': ' + repr(column_stats[c]))
        return column_stats
Beispiel #50
0
def report_errors(table_name: str):
    """Context manager that ignores exceptions and serializes them to JSON using PG's notice
    mechanism instead. The data source is meant to load these to report on partial failures
    (e.g. failed to load one table, but not others)."""
    try:
        yield
    except Exception as e:
        logging.error(
            "Error scanning %s, ignoring: %s: %s",
            table_name,
            get_exception_name(e),
            e,
            exc_info=e,
        )
        log_to_postgres(
            "SPLITGRAPH: "
            + json.dumps(
                {
                    "table_name": table_name,
                    "error": get_exception_name(e),
                    "error_text": str(e),
                }
            )
        )
Beispiel #51
0
    def __init__(self, options, columns):
        super(FakerForeignDataWrapper, self).__init__(options, columns)
        log_to_postgres("call __init__()", DEBUG)

        self.limit = 100
        self.seed = None
        self.locale = 'en_US'
        self.columns = {}

        if 'max_results' in options:
            self.limit = int(options['max_results'])

        if 'locale' in options:
            self.locale = options['locale']

        if 'seed' in options:
            self.seed = int(options['seed'])

        faker = Faker(self.locale)
        faker.seed(self.seed)

        for column in columns:
            func = getattr(faker, column, lambda: None)
            self.columns[column] = func
Beispiel #52
0
    def _matches(self, quals, row):
        """ Returns True if row matches to all quals. Otherwise returns False.

        Args:
            quals (list of Qual):
            row (list or tuple):

        Returns:
            bool: True if row matches to all quals, False otherwise.
        """
        for qual in quals:
            op = QUAL_OPERATOR_MAP.get(qual.operator)
            if op is None:
                log_to_postgres(
                    'Unknown operator {} in the {} qual. Row will be returned.'
                    .format(qual.operator, qual),
                    WARNING,
                    hint='Implement that operator in the ambryfdw wrapper.')
                continue

            elem_index = self.columns.index(qual.field_name)
            if not op(row[elem_index], qual.value):
                return False
        return True
Beispiel #53
0
	def delete(self, rowid):
		log_to_postgres('Delete Request - rowid:  %s' % rowid, DEBUG)

		if not rowid:
			log_to_postgres('Update request requires rowid (PK).', ERROR)

		try:
			if self.type == 'ContentType':
				self.client.delete_content_type(rowid)
			elif self.type == 'Entry':
				self.client.delete_entry(rowid)
			elif self.type == 'Asset':
				self.client.delete_asset(rowid)
			else:
				log_to_postgres('Deleting entities of type %s is not supported yet.' % self.type, ERROR)
		except invalidContentfulResponse as e:
			log_to_postgres(e, ERROR)

		return {}
Beispiel #54
0
    def insert(self, new_values):
        """
        This method is invoked every time a INSERT is executed
        on the foreign table.
        """
        log_to_postgres('Barman FDW INSERT output:  %s' % new_values)
        backup_cmd = "barman backup %s" % new_values['server']
        ssh_cmd = "%s@%s" % (self.barman_user,
                             self.barman_host)
        log_to_postgres('Barman FDW INSERT output:  %s' % backup_cmd)
        ssh = subprocess.Popen(["ssh", '-A', "%s" % ssh_cmd,
                                backup_cmd],
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        output = ssh.communicate()

        log_to_postgres('Barman FDW INSERT output:  %s' % output[0])
        log_to_postgres('Barman FDW INSERT errors:  %s' % output[1])
        return new_values
Beispiel #55
0
    def execute(self, quals, columns):
        log_to_postgres('exec quals: %s' % quals, DEBUG)
        log_to_postgres('exec columns: %s' % columns, DEBUG)

        resp = {}
        try:
            resp = self._client.metadata(self._path, **self._metadata_params)
        except Exception as e:
            log_to_postgres(
                'could not get metadata for path "%s": %s' %
                (self._path, str(e)), ERROR)

        for item in resp.get('contents', []):
            yield item
Beispiel #56
0
    def execute(self, quals, columns):

        log_to_postgres('Query Columns:  %s' % columns, DEBUG)
        log_to_postgres('Query Filters:  %s' % quals, DEBUG)

        (client, bucket) = self.connect()
        filters = []
        for qual in quals:
            try:
                operator_function = get_operator_function(qual.operator)
            except UnknownOperatorException, e:
                log_to_postgres(e, ERROR)
            filters.append(
                lambda x: operator_function(x[qual.field_name], qual.value))
Beispiel #57
0
    def execute(self, quals, columns):
        log_to_postgres('exec quals: %s' % quals, DEBUG)
        log_to_postgres('exec columns: %s' % columns, DEBUG)

        for coll_name in self._db.collection_names():
            try:
                res_rows = self._db.command({'collStats': coll_name})
                utils.convertNestedItemsToJson(res_rows)

                yield res_rows
            except Exception as e:
                log_to_postgres(
                    'could not get stats for coll [%s]: %s' %
                    (coll_name, str(e)), ERROR)
Beispiel #58
0
 def __init__(self, options, columns):
     super(HiveServer2ForeignDataWrapper, self).__init__(options, columns)
     if 'host' not in options:
         log_to_postgres('The host parameter is required and the default is localhost.', WARNING)
     self.host = options.get("host", "localhost")
     
     if 'port' not in options:
         log_to_postgres('The host parameter is required and the default is 10000.', WARNING)
     self.port = options.get("port", "10000")
     
     if 'table' and 'query' not in options:
         log_to_postgres('table or query parameter is required.', ERROR)
     self.table = options.get("table", None)
     self.query = options.get("query", None)
         
     self.columns = columns
Beispiel #59
0
 def __init__(self, options, columns):
     super(FilesystemFdw, self).__init__(options, columns)
     root_dir = options.get('root_dir')
     pattern = options.get('pattern')
     self.content_column = options.get('content_column', None)
     self.filename_column = options.get('filename_column', None)
     self.file_mode = int(options.get('file_mode', '700'), 8)
     self.structured_directory = StructuredDirectory(
         root_dir, pattern, file_mode=self.file_mode)
     self.folder_columns = [
         key[0] for key in self.structured_directory._path_parts_properties
         if key
     ]
     # Keep a set of files that should not be seen inside the transaction,
     # because they have "logically" been deleted, but are not yet commited
     self.invisible_files = set()
     # Keep a dictionary of updated content.
     self.updated_content = dict()
     # Assume 100 files/folder per folder
     self.total_files = 100**len(pattern.split('/'))
     if self.filename_column:
         if self.filename_column not in columns:
             log_to_postgres(
                 "The filename column (%s) does not exist"
                 "in the column list" % self.filename_column, ERROR,
                 "You should try to create your table with an "
                 "additional column: \n"
                 "%s character varying" % self.filename_column)
         else:
             columns.pop(self.filename_column)
     if self.content_column:
         if self.content_column not in columns:
             log_to_postgres(
                 "The content column (%s) does not exist"
                 "in the column list" % self.content_column, ERROR,
                 "You should try to create your table with an "
                 "additional column: \n"
                 "%s bytea" % self.content_column)
         else:
             columns.pop(self.content_column)
     if len(self.structured_directory.properties) < len(columns):
         missing_columns = set(columns.keys()).difference(
             self.structured_directory.properties)
         log_to_postgres("Some columns are not mapped in the structured fs",
                         level=WARNING,
                         hint="Remove the following columns: %s " %
                         missing_columns)
Beispiel #60
0
    def execute(self, quals, columns):

        log_to_postgres('Query Columns:  %s' % columns, DEBUG)
        log_to_postgres('Query Filters:  %s' % quals, DEBUG)

        myQuery = r.table(self.table)\
                   .pluck(self.columns.keys())

        for qual in quals:

            try:
                operatorFunction = getOperatorFunction(qual.operator)
            except unknownOperatorException, e:
                log_to_postgres(e, ERROR)

            myQuery = myQuery.filter(
                operatorFunction(r.row[qual.field_name], qual.value))