Exemple #1
0
 def connect_database(self):
     connection = pymapd.connect(
                     user=self.configuration['user'],
                     password=self.configuration['password'],
                     host=self.configuration['host'],
                     port=self.configuration['port'],
                     dbname=self.configuration['database']
             )
     return connection
Exemple #2
0
    def __init__(
        self,
        uri=None,
        user=None,
        password=None,
        host=None,
        port=9091,
        database=None,
        protocol='binary',
        execution_type=EXECUTION_TYPE_CURSOR,
    ):
        """

        Parameters
        ----------
        uri : str
        user : str
        password : str
        host : str
        port : int
        database : str
        protocol : {'binary', 'http', 'https'}
        execution_type : {
          EXECUTION_TYPE_ICP, EXECUTION_TYPE_ICP_GPU, EXECUTION_TYPE_CURSOR
        }

        """
        self.uri = uri
        self.user = user
        self.password = password
        self.host = host
        self.port = port
        self.db_name = database
        self.protocol = protocol

        if execution_type not in (
            EXECUTION_TYPE_ICP,
            EXECUTION_TYPE_ICP_GPU,
            EXECUTION_TYPE_CURSOR,
        ):
            raise Exception('Execution type defined not available.')

        self.execution_type = execution_type

        self.con = pymapd.connect(
            uri=uri,
            user=user,
            password=password,
            host=host,
            port=port,
            dbname=database,
            protocol=protocol,
        )
Exemple #3
0
 def set_database(self, name):
     if self.db_name != name and name is not None:
         self.con.close()
         self.con = pymapd.connect(
             uri=self.uri,
             user=self.user,
             password=self.password,
             host=self.host,
             port=self.port,
             dbname=name,
             protocol=self.protocol,
         )
         self.db_name = name
Exemple #4
0
def mapd(schema, tables, data_directory, **params):
    import pymapd

    data_directory = Path(data_directory)
    reserved_words = ['table', 'year', 'month']

    # connection
    logger.info('Initializing MapD...')
    if params['database'] != 'mapd':
        conn = pymapd.connect(
            host=params['host'],
            user=params['user'],
            password=params['password'],
            port=params['port'],
            dbname='mapd',
        )
        database = params["database"]
        stmt = "DROP DATABASE {}".format(database)
        try:
            conn.execute(stmt)
        except Exception:
            logger.warning('MapD DDL statement %r failed', stmt)

        stmt = 'CREATE DATABASE {}'.format(database)
        try:
            conn.execute(stmt)
        except Exception:
            logger.exception('MapD DDL statement %r failed', stmt)
        conn.close()

    conn = pymapd.connect(
        host=params['host'],
        user=params['user'],
        password=params['password'],
        port=params['port'],
        dbname=database,
    )

    # create tables
    for stmt in filter(None, map(str.strip, schema.read().split(';'))):
        try:
            conn.execute(stmt)
        except Exception:
            logger.exception('MapD DDL statement \n%r\n failed', stmt)

    # import data
    for table, df in read_tables(tables, data_directory):
        if table == 'batting':
            # float nan problem
            cols = df.select_dtypes([float]).columns
            df[cols] = df[cols].fillna(0).astype(int)

            # string None driver problem
            cols = df.select_dtypes([object]).columns
            df[cols] = df[cols].fillna('')
        elif table == 'awards_players':
            # string None driver problem
            cols = df.select_dtypes([object]).columns
            df[cols] = df[cols].fillna('')

        # rename fields
        for df_col in df.columns:
            if ' ' in df_col or ':' in df_col:
                column = df_col.replace(' ', '_').replace(':', '_')
            elif df_col in reserved_words:
                column = '{}_'.format(df_col)
            else:
                continue
            df.rename(columns={df_col: column}, inplace=True)

        # load geospatial data
        if table == 'geo':
            conn.load_table_rowwise(
                table, list(df.itertuples(index=False, name=None))
            )
        else:
            conn.load_table_columnar(table, df)

    conn.close()
 def test_connect_uri_and_others_raises(self):
     uri = ('omnisci://*****:*****@localhost:6274/omnisci?'
            'protocol=binary')
     with pytest.raises(TypeError):
         connect(username='******', uri=uri)
Exemple #6
0
    def __init__(
        self,
        uri: Optional[str] = None,
        user: Optional[str] = None,
        password: Optional[str] = None,
        host: Optional[str] = None,
        port: Optional[int] = 6274,
        database: Optional[str] = None,
        protocol: str = 'binary',
        session_id: Optional[str] = None,
        ipc: Optional[bool] = None,
        gpu_device: Optional[int] = None,
    ):
        """
        Initialize OmniSciDB Client.

        Parameters
        ----------
        uri : str, optional
        user : str, optional
        password : str, optional
        host : str, optional
        port : int, default 6274
        database : str, optional
        protocol : {'binary', 'http', 'https'}, default 'binary'
        session_id: str, optional
        ipc : bool, optional, default None
          Enable Inter Process Communication (IPC) execution type.
          `ipc` default value when `gpu_device` is None is False, otherwise
          its default value is True.
        gpu_device : int, optional, default None
          GPU Device ID.

        Raises
        ------
        Exception
            if the given execution_type is not valid.
        PyMapDVersionError
            if session_id is given but pymapd version is less or equal to 0.12
        """
        self.uri = uri
        self.user = user
        self.password = password
        self.host = host
        self.port = port
        self.db_name = database
        self.protocol = protocol
        self.session_id = session_id

        self._check_execution_type(ipc=ipc, gpu_device=gpu_device)

        self.ipc = ipc
        self.gpu_device = gpu_device

        if session_id:
            if self.version < pkg_resources.parse_version('0.12.0'):
                raise PyMapDVersionError(
                    'Must have pymapd > 0.12 to use session ID'
                )
            self.con = pymapd.connect(
                uri=uri,
                host=host,
                port=port,
                protocol=protocol,
                sessionid=session_id,
            )
        else:
            self.con = pymapd.connect(
                uri=uri,
                user=user,
                password=password,
                host=host,
                port=port,
                dbname=database,
                protocol=protocol,
            )
Exemple #7
0
 def test_connect_uri_and_others_raises(self):
     uri = ('mapd://*****:*****@localhost:6274/mapd?protocol='
            'binary')
     with pytest.raises(TypeError):
         connect(username='******', uri=uri)
Exemple #8
0
import xarray as xr
import gc
from pymapd import connect
import pyarrow as pa
import numpy as np
import pandas as pd
from pyproj import Proj, transform
conn = connect(user="******",
               password="******",
               host="localhost",
               dbname="mapd")
import mzgeohash
import unicodedata
from datetime import datetime
model_name = 'analysis_assim'
date = [
    '20180101', '20180102', '20180103', '20180104', '20180105', '20180106',
    '20180107', '20180108', '20180109', '20180110'
]
#date=['20180101']
time = 't00'
#time=['t00','t06','t12','t18']
for datels in date:
    file_prefix = time + 'z'
    filename = 'nwm.' + file_prefix + '.analysis_assim.terrain_rt.tm00.conus.nc'
    path = '/raidStorage/nwm_data/' + model_name + '/' + datels + '/' + time
    file = path + '/' + filename
    print(file)
    ds = xr.open_dataset(file)
    df = ds.to_dataframe()
    df = df.reset_index()
Exemple #9
0
import pandas as pd
from pymapd import connect
con = connect(user="******", password="******",protocol="https", host="ec-meit.ca",port="/api",dbname="meit")

# mapmeit
sql = "SELECT mapmeit as key0,SUM(nox*1) AS nox FROM DB_2015 WHERE nox IS NOT NULL GROUP BY key0 ORDER BY nox DESC LIMIT 50"
pd.read_sql(sql,con).to_csv("example.csv",index=False)
Exemple #10
0
def con(mapd_server):
    return connect(user="******", password='******', host='localhost',
                   port=6274, protocol='binary', dbname='mapd')
def main(argv):

    if len(argv) == 0:
        key_file_location = './client_secrets.json'
        selected_profile = None
        omnisci_url = None
        date_ranges = None
    else:
        key_file_location = argv[0]
        selected_profile = argv[1]
        omnisci_url = argv[2]
        date_ranges = [(argv[3], argv[4])]

        with pymapd.connect(omnisci_url) as con:
            print('existing tables: ',
                  [x for x in con.get_tables() if x.startswith('omnisci')])

    service = get_service(key_file_location)

    # Construct dictionary of GA website name and ids.
    profile_ids = traverse_hierarchy(service)

    # Select the GA profile view to extract data
    selection_list = [0]
    i = 1
    print('%5s %20s %5s %20s' % ("Item#", "View ID", " ", "View Name"))
    for profile in sorted(profile_ids):
        selection_list = selection_list + [profile_ids[profile]]
        print('%4s %20s %5s %20s' % (i, profile_ids[profile], " ", profile))
        i += 1

    if not selected_profile:
        print(
            'Enter the item# of the view you would like to ingest into MapD: ')
        item = int(input())
        if item == '' or item <= 0 or item >= len(selection_list):
            print('Invalid selection - %s' % item)
            sys.exit(0)
        print('Item # %s selected' % item)
    else:
        item = selection_list.index(profile_ids[selected_profile])

    if not date_ranges:
        print(
            '\nEnter the begin date and end date in the following format: YYYY-MM-DD YYYY-MM-DD'
        )
        print(
            'Or hit enter to proceed with the default which is last 30 days data'
        )
        print('Date Range: ')
        begin_end_date = input()
        if begin_end_date == '':
            print('Extract data from today to 30 days ago')

            # date_ranges = [('2017-08-27', '2018-02-22')]
            # date_ranges = [('30daysAgo', 'today')]
            date_ranges = [('2daysAgo', 'today')]

        else:
            (begin_date, end_date) = [
                t(s) for t, s in zip((str, str), begin_end_date.split())
            ]
            print('Extract data from %s to %s' % (begin_date, end_date))
            date_ranges = [(begin_date, end_date)]

    if not omnisci_url:
        print(
            "\nEnter the OmniSci server URL if you want to upload data,\n otherwise simply hit enter to use the manual procedure to upload the data"
        )
        print(
            "  URL example: - omnisci://admin:[email protected]:6274/omnisci?protocol=binary"
        )
        print('OmniSci URL: ')
        omnisci_url = input()
        if omnisci_url == '':
            print(
                'Use MapD Immerse import user interface to load the output CSV file'
            )
            omnisci_url = None
        print("")

    csv_list = []
    for profile in sorted(profile_ids):
        if (selection_list[item] == profile_ids[profile]):
            print('\nGoing to download data for %s (%s) ...' %
                  (profile, profile_ids[profile]))
            table_name = profile.lower()
            table_name = '%s' % (table_name.replace(' ', ''))
            final_csv_file = './data/%s.csv' % (table_name)
            final_csv_gzfile = './data/%s.csv.gz' % (table_name)
            csv_list = build_csv_list(service, profile_ids[profile], profile,
                                      date_ranges, csv_list)
            merge_tables(final_csv_file, csv_list)
    print("Download of analytics data done.")

    # TODO Lines below need to be inside the for loop above?

    # Gzip the CSV file
    if os.path.isfile(final_csv_gzfile):
        os.remove(final_csv_gzfile)
    with open(final_csv_file, 'rb') as f_in, gzip.open(final_csv_gzfile,
                                                       'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

    # Connect to MapD
    if not omnisci_url or omnisci_url == '':
        print(
            "======================================================================="
        )
        print('Goto OmniSci Immerse UI and import the CSV file %s' %
              (final_csv_gzfile))
        print(
            "======================================================================="
        )

    else:
        with pymapd.connect(omnisci_url) as con:
            load_table_mapd(con, table_name, final_csv_gzfile)

        print(
            "======================================================================="
        )
        print('Goto OmniSci Immerse UI')
        print(
            "======================================================================="
        )
Exemple #12
0
# Chapter10_2.py
# Using pymapd and the mapd database to create a geospatial table
# 20180215

import pymapd
from pymapd import connect
connection = connect(user="******",
                     password="******",
                     host="community.mapd.com",
                     dbname="mapd")
import time

cursor = connection.cursor()

DROP = "DROP TABLE COUNTY;"
create = """CREATE TABLE county ( id integer NOT NULL, 
	name VARCHAR(50), statefips VARCHAR(3), 
	stpostal VARCHAR(3), geom Polygon );
"""

print(time.time())
try:
    cursor.execute(DROP)
except:
    pass
cursor.execute(create)
connection.commit()
print(time.time())
Exemple #13
0
import pymapd as py
import matplotlib.pyplot as plt

user_str = 'I97D12D9D7268474D874'
password_str = 'quzPy63R6kAG4BTeQ0Y5DxnsO3sZl7pehv6u673v'
host_str = 'use2-api.mapd.cloud'
dbname_str = 'mapd'
connection = py.connect(user=user_str, password=password_str, host=host_str, dbname=dbname_str, port=443, protocol='https')
# connection = py.connect(host=host_str, dbname=dbname_str, port=443, protocol='https')


query = "SELECT movement_id FROM san_francisco_taz_26 LIMIT 100"
df = connection.execute(query)
df2 = df.fetchall()

plt.plot(df2)
plt.show()
#Edited the sample from documentation at https://pymapd.readthedocs.io/en/latest/usage.html
#Some parts by Tsubasa Kato (@_stingraze on Twitter)
from pymapd import connect
import pandas as pd

con = connect(user="******",
              password="******",
              host="localhost",
              dbname="omnisci")
df = pd.read_sql("SELECT * from flights_2008_7M limit 100", con)

print(df.to_string())
Exemple #15
0
    "-commit",
    default="1234567890123456789012345678901234567890",
    help="Commit hash to use to record this benchmark results",
)

args = parser.parse_args()

if args.df <= 0:
    print("Bad number of data files specified", args.df)
    sys.exit(1)

if args.iterations < 1:
    print("Bad number of iterations specified", args.t)

con = connect(
    user="******", password="******", host="localhost", dbname="omnisci", port=args.port,
)

db_reporter = None
if args.db_user is not "":
    print("Connecting to database")
    db = mysql.connector.connect(
        host=args.db_server,
        port=args.db_port,
        user=args.db_user,
        passwd=args.db_pass,
        db=args.db_name,
    )
    db_reporter = report.DbReport(
        db,
        args.db_table,
Exemple #16
0
import pandas as pd
from pymapd import connect
from os import listdir
import glob
from os.path import isfile, join
import pyarrow as pa;import numpy as np


path = '/n/holyscratch01/cga/dkakkar/data/geotweets/results/2020/output/' # use your path
all_files = glob.glob(path + "/*.gz")

print("Connecting to Omnisci")
conn=connect(user="******", password="******", host="localhost", port=7159, dbname="omnisci") #use your port number
print("Connected",conn)
#query="DROP TABLE IF EXISTS geotweets"
#coinn.execute(query)
l_ni=[]
conn.execute("DROP TABLE IF EXISTS geotweets;")

for filename in all_files:
    #print(filename)
    try:
      df = pd.read_csv(filename, sep='\t',dtype='unicode',index_col=None, low_memory='true',compression='gzip')
    except:
      l_ni.append(filename)
      #print("Corrupt file",filename)
      continue
    df = df.drop(['geom'], axis = 'columns')
    #print(df.head())
    #df.columns=['GLOBALEVENTID','SQLDATE','MonthYear','Years','FractionDate','Actor1Code','Actor1Name','Actor1CountryCode','Actor1KnownGroupCode','Actor1EthnicCode','Actor1Religion1Code','Actor1Religion2Code','Actor1Type1Code','Actor1Type2Code','Actor1Type3Code','Actor2Code','Actor2Name','Actor2CountryCode','Actor2KnownGroupCode','Actor2EthnicCode','Actor2Religion1Code','Actor2Religion2Code','Actor2Type1Code','Actor2Type2Code','Actor2Type3Code','IsRootEvent','EventCode','EventBaseCode','EventRootCode','QuadClass','GoldsteinScale','NumMentions','NumSources','NumArticles','AvgTone','Actor1Geo_Type','Actor1Geo_FullName','Actor1Geo_CountryCode','Actor1Geo_ADM1Code','Actor1Geo_Lat','Actor1Geo_Long','Actor1Geo_FeatureID','Actor2Geo_Type','Actor2Geo_FullName','Actor2Geo_CountryCode','Actor2Geo_ADM1Code','Actor2Geo_Lat','Actor2Geo_Long','Actor2Geo_FeatureID','ActionGeo_Type','ActionGeo_FullName','ActionG]    #print(df.head(5))    #li.append(df)