Example #1
0
def list_blobs_with_prefix(bucket_name, prefix, delimiter=None):
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)

    blobs = bucket.list_blobs(prefix=prefix, delimiter=delimiter)
    return blobs
Example #2
0
                        metavar='OUTPUTGCSFILE',
                        type=gcs_path_regex_validator,
                        nargs=1,
                        help='GCS path to output CSV file')
    parser.add_argument('--workflow-ids',
                        metavar='WORKFLOWIDS',
                        type=workflow_regex_validator,
                        nargs='+',
                        help='Workflow ids for performance comparison')

    args = parser.parse_args()
    set_log_verbosity(args.verbose)
    quieten_chatty_imports()
    logger.info("Starting Comparer operation.")

    credentials, project_id = google.auth.default()
    storage_client = storage.Client(credentials=credentials)
    input_gcs_bucket, input_gcs_path = args.digest_gcs_base_path[0]

    workflow_ids_and_jsons = read_digester_jsons_from_gcs(
        input_gcs_bucket, input_gcs_path, args.digester_version[0],
        args.workflow_ids, storage_client)
    comparison_result_df = compare_jsons(workflow_ids_and_jsons)
    result_csv_string = comparison_result_df.to_csv()

    output_gcs_bucket, output_gcs_path = args.output_gcs_file_path[0]
    upload_blob(output_gcs_bucket, result_csv_string, output_gcs_path,
                storage_client, logger)

    logger.info('Comparer operation completed successfully.')
def start_vm(project_id,
             zone,
             vm_username,
             vm_name,
             vm_header,
             firewall_rule_name,
             firewall_ip_range,
             serv_port,
             region,
             address_name,
             password,
             topic_name=None):
    setup_stage = SETUP_PROJECT
    firewall_tag = vm_header + '-restricted-jupyter'
    notebook_gs_bucket = vm_header + '-notebook-vm'
    machine_desc = 'Jupyter Notebook Server for ' + vm_username
    project_no = None
    ext_ip_address = None
    resp_code = 200
    result = {}
    try:
        compute = get_compute_resource()

        if setup_stage == SETUP_PROJECT:
            logger.debug('Validating Project ID ...')
            credentials = GoogleCredentials.get_application_default()
            service = build('cloudresourcemanager',
                            'v1',
                            credentials=credentials)
            response = service.projects().list(
                filter='projectId={}'.format(project_id)).execute()
            if 'projects' in response:
                project_no = response['projects'][0]['projectNumber']
                setup_stage = SETUP_FIREWALL
                logger.debug('Project ID has been validated')
            else:
                resp_code = 500
                result = {
                    'message':
                    'Unable to find project ID [{}]'.format(project_id)
                }

        if setup_stage == SETUP_FIREWALL:
            # check if firewall exists
            logger.debug('Setup a firewall ...')
            response = build_firewalls_request(
                compute=compute,
                method=FIREWALLS_LIST,
                project_id=project_id,
                firewall_rule_name=firewall_rule_name).execute()
            firewall_body = {
                'allowed': [{
                    'ports': [serv_port],
                    'IPProtocol': 'tcp'
                }],
                'targetTags': [firewall_tag],
                'sourceRanges': firewall_ip_range,
                'description': firewall_rule_name
            }
            if 'items' in response:
                logger.debug(
                    'Found existing firewall [{}].'.format(firewall_rule_name))
                # update
                resp_body = response['items'][0]
                need_fw_update = False

                for attr in firewall_body.keys():
                    if attr not in resp_body or resp_body[
                            attr] != firewall_body[attr]:
                        need_fw_update = True
                        break
                if need_fw_update:
                    logger.debug('Updating firewall [{}] properties.'.format(
                        firewall_rule_name))
                    response = build_firewalls_request(
                        compute=compute,
                        method=FIREWALLS_UPDATE,
                        project_id=project_id,
                        firewall_rule_name=firewall_rule_name,
                        firewall_body=firewall_body).execute()
            else:
                # create a new firewall rule
                logger.debug(
                    'Creating a new firewall [{}].'.format(firewall_rule_name))
                firewall_body['name'] = firewall_rule_name
                response = build_firewalls_request(
                    compute=compute,
                    method=FIREWALLS_CREATE,
                    project_id=project_id,
                    firewall_rule_name=firewall_rule_name,
                    firewall_body=firewall_body).execute()
                wait_for_operation(compute=compute,
                                   project=project_id,
                                   operation=response['name'])
            if 'error' in response:
                result = {
                    'message':
                    'There was an error setting up a firewall [{}]: {}'.format(
                        firewall_rule_name, response['error']['message'])
                }
                resp_code = response['error']['code']
            else:
                setup_stage = SETUP_EXTERNAL_IP

        if setup_stage == SETUP_EXTERNAL_IP:
            logger.debug('Setup External IP address')
            response = build_addresses_request(compute=compute,
                                               method=ADDRESSES_LIST,
                                               project_id=project_id,
                                               region=region,
                                               name=address_name).execute()
            if 'items' not in response:
                logger.debug('Creating a new external IP address')
                response = build_addresses_request(
                    compute=compute,
                    method=ADDRESSES_CREATE,
                    project_id=project_id,
                    region=region,
                    name=address_name).execute()
                wait_for_operation(compute=compute,
                                   project=project_id,
                                   operation=response['name'],
                                   region=region)

            if 'error' in response:
                result = {
                    'message':
                    'There was an error setting up an external IP address [{}]: {}'
                    .format(address_name, response['error']['message'])
                }
                resp_code = response['error']['code']
            else:
                setup_stage = SETUP_MONITOR

        if setup_stage == SETUP_MONITOR:
            logger.debug('Set monitoring service ...')
            credentials = GoogleCredentials.from_stream(
                settings.GOOGLE_APPLICATION_CREDENTIALS).create_scoped(
                    SERVICE_USAGE_SCOPES)
            http = credentials.authorize(httplib2.Http())
            su_service = build('serviceusage',
                               'v1',
                               http=http,
                               cache_discovery=False)

            response = su_service.services().list(
                parent='projects/{}'.format(project_no),
                filter='state:ENABLED',
                fields='services/config/name').execute()
            need_su_update = True
            for s in response['services']:
                if s['config']['name'] == NEED_API:
                    logger.debug('Monitoring service is already enabled.')
                    need_su_update = False
                    break
            if need_su_update:
                logger.debug('Enabling monitoring service ...')
                response = su_service.services().enable(
                    name='projects/{}/services/{}'.format(
                        project_no, NEED_API),
                    body={}).execute()

            if 'error' in response:
                result = {
                    'message':
                    'There was an error while setting the monitoring service [{}] : {}'
                    .format(NEED_API, response['error']['message'])
                }
                resp_code = response['error']['code']
            else:
                setup_stage = SETUP_FILES
        if setup_stage == SETUP_FILES:
            response = build_addresses_request(compute=compute,
                                               method=ADDRESSES_GET,
                                               project_id=project_id,
                                               region=region,
                                               name=address_name).execute()
            ext_ip_address = response['address']
            logger.debug('Setting up files ...')

            logger.debug('Find bucket {}'.format(notebook_gs_bucket))
            client = storage.Client()
            bucket = client.lookup_bucket(notebook_gs_bucket)

            if not bucket:
                logger.debug('Creating a new bucket {bucket_name}'.format(
                    bucket_name=notebook_gs_bucket))
                bucket = client.create_bucket(
                    bucket_or_name=notebook_gs_bucket, project=project_id)
            logger.debug('Upload files to bucket {bucket_name}'.format(
                bucket_name=notebook_gs_bucket))

            upload_blob_string(bucket, CERT_SUBJ + ext_ip_address,
                               CERT_SUBJ_FILENAME)
            hashpass = hash_it(password)
            upload_blob_string(bucket, hashpass, PASSHASH_FILENAME)
            env_vars_sh = "PROJECT={project_id}\n".format(
                project_id=project_id)
            env_vars_sh += "USER_NAME={vm_username}\n".format(
                vm_username=vm_username)
            env_vars_sh += "MACHINE_NAME={vm_name}\n".format(vm_name=vm_name)
            env_vars_sh += "SERV_PORT={serv_port}\n".format(
                serv_port=serv_port)
            upload_blob_string(bucket, env_vars_sh, ENV_VARS_SH_FILE)
            base_dir = os.path.dirname(os.path.dirname(__file__))
            upload_filenames = [
                CPU_LOGGER_FILE, IDLE_LOG_FILE, IDLE_LOG_SH_FILE,
                IDLE_SHUTDOWN_FILE, IDLE_SHUTDOWN_SH_FILE, INSTALL_SH_FILE
            ]
            for filename in upload_filenames:
                upload_blob_filename(
                    bucket, '{base_dir}/{sub_dir}/{filename}'.format(
                        base_dir=base_dir,
                        sub_dir=NOTEBOOK_VM_SHELL_DIR,
                        filename=filename), filename)
            setup_stage = SETUP_INSTANCE

        if setup_stage == SETUP_INSTANCE:
            logger.debug('Setting a VM instance ...')
            response = build_instances_request(compute=compute,
                                               method=INSTANCES_LIST,
                                               project_id=project_id,
                                               zone=zone,
                                               name=vm_name).execute()
            if 'items' in response:
                instance_settings = response['items'][0]
                logger.debug(
                    'Existing VM instance {} found. STATUS: {}'.format(
                        vm_name, instance_settings['status']))
                if instance_settings[
                        'status'] == 'TERMINATED':  # todo: handle other status as well and wait
                    logger.debug('Starting a VM instance ...')
                    response = build_instances_request(compute=compute,
                                                       method=INSTANCES_START,
                                                       project_id=project_id,
                                                       zone=zone,
                                                       name=vm_name).execute()
            else:
                logger.debug('Create and start up a new VM instance')
                instance_body = {
                    'name':
                    vm_name,
                    'machineType':
                    'zones/{zone}/machineTypes/{machine_type}'.format(
                        zone=zone, machine_type=MACHINE_TYPE),
                    'description':
                    machine_desc,
                    'disks': [{
                        'boot': True,
                        'autoDelete': True,
                        'initializeParams': {
                            'sourceImage':
                            'projects/debian-cloud/global/images/family/debian-9',
                            'diskSizeGb': DISK_SIZE
                        }
                    }],
                    'serviceAccounts': [{
                        'scopes': [
                            'https://www.googleapis.com/auth/bigquery',
                            'https://www.googleapis.com/auth/devstorage.read_write',
                            'https://www.googleapis.com/auth/monitoring'
                        ]
                    }],
                    'networkInterfaces': [{
                        'accessConfigs': [{
                            'natIP': ext_ip_address
                        }]
                    }],
                    'tags': {
                        'items': [firewall_tag]
                    },
                    'metadata': {
                        "items": [{
                            "key": "NOTEBOOK_GS_BUCKET",
                            "value": notebook_gs_bucket
                        }, {
                            "key":
                            "startup-script",
                            "value":
                            append_file_to_string(
                                '', '{base_dir}/{sub_dir}/{filename}'.format(
                                    base_dir=base_dir,
                                    sub_dir=NOTEBOOK_VM_SHELL_DIR,
                                    filename=STARTUP_SH_FILE))
                        }]
                    }
                }
                response = build_instances_request(
                    compute=compute,
                    method=INSTANCES_CREATE,
                    project_id=project_id,
                    zone=zone,
                    body=instance_body).execute()

            if 'name' in response:
                wait_for_operation(compute=compute,
                                   project=project_id,
                                   operation=response['name'],
                                   zone=zone)
                if topic_name:
                    time.sleep(120)  # sleep for at least 90 seconds
        # if setup_stage == SETUP_PUBSUB:
        #     logger.debug('Setup Pub/Sub ...')
        #     # delete topic if exists
        #     publisher = pubsub_v1.PublisherClient()
        #     topic_path = publisher.topic_path(project_id, topic_name)
        #     try:
        #         publisher.get_topic(topic_path)
        #     except GoogleAPICallError:
        #         logger.debug('Creating a Pub/Sub topic ...')
        #         publisher.create_topic(topic_path)

    except HttpError as e:
        content = json.loads(e.content.decode('utf-8'))
        reason = content['error']['message']
        resp_code = e.resp.status
        result = {
            'message':
            'There was an error while setting up for the {}: {}'.format(
                STAGE_TITLES[setup_stage], reason)
        }
        logger.error("[ERROR] " + result['message'])
        logger.exception(e)

    if resp_code == 200 and not result:
        result = {
            'message': 'Instance has started.',
            'ext_ip_address': ext_ip_address
        }
    result['resp_code'] = resp_code
    return result
Example #4
0
def Ejecutar():

    reload(sys)
    sys.setdefaultencoding('utf8')
    storage_client = storage.Client()
    bucket = storage_client.get_bucket('ct-telefonia')
    gcs_path = 'gs://ct-telefonia'
    sub_path = KEY_REPORT + '/'
    output = gcs_path + "/" + sub_path + fecha + ext
    blob = bucket.blob(sub_path + fecha + ext)
    dateini = request.args.get('dateini')
    dateend = request.args.get('dateend')

    if dateini is None:
        dateini = GetDate1
    else:
        dateini = dateini + hour1

    if dateend is None:
        dateend = GetDate2
    else:
        dateend = dateend + hour2

    client = bigquery.Client()
    QUERY = (
        'SELECT servidor, operacion, token, ipdial_code, id_cliente, cartera FROM telefonia.parametros_ipdial where Estado = "Activado"'
    )  #WHERE ipdial_code = "intcob-unisabaneta"
    query_job = client.query(QUERY)
    rows = query_job.result()
    data = ""

    try:
        os.remove(ruta_completa)  #Eliminar de aries
    except:
        print("Eliminado de aries")

    try:
        blob.delete()  #Eliminar del storage-----
    except:
        print("Eliminado de storage")

    try:
        ##QUERY2 = ('delete FROM `contento-bi.telefonia.chats` where date = ' + '"' + dateini[0:8] + '"')
        QUERY2 = (
            'delete FROM `contento-bi.telefonia.chats` where CAST(chat_date AS DATE) = '
            + '"' + dateini[0:4] + '-' + dateini[4:-8] + '-' + dateini[6:-6] +
            '"')
        query_job = client.query(QUERY2)
        rows2 = query_job.result()
    except:
        print("Eliminado de bigquery")

    file = open(ruta_completa, "a")
    for row in rows:
        url = 'http://' + str(
            row.servidor
        ) + '/ipdialbox/api_reports.php?token=' + row.token + '&report=' + str(
            CODE_REPORT) + '&date_ini=' + dateini + '&date_end=' + dateend
        datos = requests.get(url).content

        # print(url)

        if len(requests.get(url).content) < 50:
            continue
        else:
            i = json.loads(datos)
            for rown in i:
                file.write(
                    str(rown["chat_id"]).encode('utf-8').replace(
                        '\n', ' ').replace('\r', '') + "|" +
                    str(rown["channel"]).replace('\n', ' ').replace('\r', '') +
                    "|" + str(rown["chat_date"]).replace('\n', ' ').replace(
                        '\r', '') + "|" + str(rown["user_name"]).replace(
                            '\n', ' ').replace('\r', '') + "|" +
                    str(rown["user_email"]).replace('\n', ' ').replace(
                        '\r', '') + "|" + str(rown["user_phone"]).encode(
                            'utf-8').replace('\n', ' ').replace('\r', '') +
                    "|" + str(rown["user_chat_chars"]).replace(
                        '\n', ' ').replace('\r', '') +
                    "|" + str(rown["agent_id"]).replace('\n', ' ').replace(
                        '\r', '') + "|" + str(rown["agent_name"]).replace(
                            '\n', ' ').replace('\r', '') + "|" +
                    str(rown["agent_chat_chars"]).replace('\n', ' ').replace(
                        '\r', '') + "|" + str(rown["chat_duration"]).replace(
                            '\n', ' ').replace('\r', '') + "|" +
                    str(rown["cod_act"]).replace('\n', ' ').replace('\r', '') +
                    "|" +
                    str(rown["comment"]).replace('\n', ' ').replace('\r', '') +
                    "|" + str(rown["id_customer"]).replace('\n', ' ').replace(
                        '\r', '') + "|" + str(rown["agent_skill"]).replace(
                            '\n', ' ').replace('\r', '') + "|" +
                    str(rown["user_id"]).replace('\n', ' ').replace('\r', '') +
                    "|" +
                    str(row.id_cliente).replace('\n', ' ').replace('\r', '') +
                    "|" + str(row.cartera).encode('utf-8') + "\n")

    file.close()
    blob.upload_from_filename(ruta_completa)
    time.sleep(10)
    ejecutar = chats_beam.run(
        output, KEY_REPORT
    )  #[[[[[[[[[[[[[[[[[[***********************************]]]]]]]]]]]]]]]]]]
    time.sleep(60)

    return ("Se acaba de ejecutar el proceso de " + KEY_REPORT +
            " Para actualizar desde: " + dateini + " hasta " + dateend)
Example #5
0
def hello_world(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.

    """
    paramas = request.get_json()
    showdata = {"success": False}
    import pandas as pd
    import numpy as np

    from google.cloud import storage
    client = storage.Client()
    bucket = client.get_bucket('qwiklabs-gcp-02-81b9a19561ac.appspot.com')
    blob = bucket.get_blob(file['name'])
    contents = blob.download_as_string()
    print(contents)
    from io import StringIO
    contents = str(contents, "utf-8")
    contents = StringIO(contents)
    data = pd.read_csv(contents)
    data = np.array(data)
    print(data)
    data = data['Open']
    data = np.array(data)
    data = np.reshape(data, (1235, 1))
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    data = np.reshape(data, (1235))
    data_training = data[100:]
    data_test = data[:100]
    data_test = data_test[::-1]
    data_training = data_training[::-1]
    X_train = []
    y_train = []
    for i in range(60, data_training.shape[0]):
        X_train.append(data_training[i - 60:i])
        y_train.append(data_training[i])
    X_train, y_train = np.array(X_train), np.array(y_train)
    print(X_train.shape)
    print(y_train.shape)
    print(X_train.shape[1])
    X_train = np.reshape(X_train, (1075, 60, 1))
    from tensorflow.keras import Sequential
    from tensorflow.keras.layers import Dense, LSTM, Dropout
    regressior = Sequential()
    regressior.add(
        LSTM(units=60,
             activation='relu',
             return_sequences=True,
             input_shape=(X_train.shape[1], 1)))
    regressior.add(Dropout(0.2))
    regressior.add(LSTM(units=60, activation='relu', return_sequences=True))
    regressior.add(Dropout(0.2))
    regressior.add(LSTM(units=80, activation='relu', return_sequences=True))
    regressior.add(Dropout(0.2))
    regressior.add(LSTM(units=120, activation='relu'))
    regressior.add(Dropout(0.2))
    regressior.add(Dense(units=1))
    regressior.compile(optimizer='adam',
                       loss='mean_squared_error',
                       metrics=['accuracy'])
    hist = regressior.fit(X_train, y_train, epochs=8, batch_size=32)
    preddata = data[:59]
    preddata = np.reshape(preddata, (-1, 1))
    preddata = scaler.inverse_transform(preddata)
    preddata = preddata[::-1]
    xdata = []
    xdata.append(paramas['Open'])
    preddata = np.append(preddata, xdata)
    preddata = np.reshape(preddata, (-1, 1))
    preddata = scaler.transform(preddata)
    preddata = np.reshape(preddata, (1, 60, 1))
    predvalue = regressior.predict(preddata)
    tranpred = predvalue
    print(tranpred)
    tranpred = scaler.inverse_transform(tranpred)
    print(tranpred)
Example #6
0
def clone_lastgreen(args):
  gcs_client = storage.Client()
  sha = get_latest_green_presubmit(gcs_client)

  util.clone_repo(args.src_dir, util.MASTER_REPO_OWNER, util.MASTER_REPO_NAME,
                  sha)
Example #7
0
def function_spark(numero_activacion):
    
    # Descargamos los datos del bucket a variables locales
    storage_client = storage.Client()
    bucket= storage_client.get_bucket('tfm-samur-bucket')
    blob = bucket.get_blob('Raw_data/samur_activaciones_'+str(numero_activacion)+'.csv')
    content = blob.download_as_string()

    #Preparamos el dato para poder convertirlo en un RDD de spark
    data = pd.read_csv(BytesIO(content), index_col=0)
    data = data.values.tolist()

    #Generamos el esquema de unestro RDD
    
    schema = StructType([
        StructField("Año", IntegerType(), True),
        StructField("Mes", StringType(), True),
        StructField("Solicitud", StringType(), True),
        StructField("Intervencion", StringType(), True),
        StructField("codigo", StringType(), True),
        StructField("Distrito", StringType(), True),
        StructField("Hospital", StringType(), True)
        ])

    #Comenzamos nuestra aplicación de Spark
    
    spark = SparkSession.builder.appName("samur").config("spark.some.config.option", "some-value").getOrCreate()

    #Generamos nuestro RDD
    df = spark.createDataFrame(data,schema)

    #Extraemos los datos la hora de solicitud no sea Nan
    df= df.filter(df.Solicitud!='NaN')

    #Expliteamos el campos solicitud para obtener la hora
    split_col = pyspark.sql.functions.split(df['Solicitud'], ':')
    df = df.withColumn('Hora', split_col.getItem(0))

    #Lo convertimos en un pdDataFrame para poder hacer una iteración y obtener array de meses
    data = df.toPandas()
    meses = data.Mes.unique()

    #Llamada a la funcion que me permitira setear los dias de cada mes
    data = day_set(data,'Hora')

    #Volvemos a preparar el dataframe para convertirlo en un RDD
    data= data.values.tolist()
    schema = StructType([
        StructField("Año", IntegerType(), True),
        StructField("Mes", StringType(), True),
        StructField("Solicitud", StringType(), True),
        StructField("Intervencion", StringType(), True),
        StructField("Codigo", StringType(), True),
        StructField("Distrito", StringType(), True),
        StructField("Hospital", StringType(), True),
        StructField("Hora_Solicitud", StringType(), True),
        StructField("Dia", StringType(), True)
        ])

    #Generamos el RDD
    df = spark.createDataFrame(data,schema)

    #Covertimos columna de str a int
    df = df.withColumn("Dia", df["Dia"].cast(IntegerType()))

    #Creamos columna 'Month' en formato numerico
    df = df.withColumn("Mes_num",lit(0))
    numero = 1
    for i in meses:
        df = df.withColumn('Mes_num', when(col('Mes') == i,numero).otherwise(col('Mes_num')))
        numero = numero + 1 

    #Creamos la columna 'Finalizado' que servirá para el conteo total de llamdas al dia
    df = df.withColumn("Finalizado",lit(1))
    df = df.withColumn('Finalizado', when(col('Intervencion') != 'NaN',1).otherwise(0))
    
    #Eliminamos las columnas que en este caso no vamos a necesitar para el objetivo a conseguir,
    #No obstante podrían ser de gran utilidad para otro tipo de procesos.
    df = df.drop('Hora_Solicitud','Mes')
    
    #Generamos una nueva columna fecha, que servirá para poder realizar pruebas con algoritmos de SSTT
    df= df.withColumn('Fecha_activacion',concat(col('Año'), lit('-'), col('Mes_num'),lit('-'),col('Dia')))
    
    #Colocamos el dataframe de una forma mas ordenada
    df = df.select(col('Fecha_activacion'),col('Año'),col('Mes_num'),col('Dia'),col('Solicitud'),
                  col('Intervencion'),col('Distrito'),col('Hospital'),col('Finalizado'))
    
    #Subimos el archivo creado a google cloud storage de nuestro primer paso.
    upload_file(df,numero_activacion,1)
    
    #Agrupamos por fecha y distrito para ver el numero total de llamadas por zona cada dia.
    df = df.select(col('Fecha_activacion'),col('Distrito'),col('Finalizado'))
    df = df.groupBy("Fecha_activacion",'Distrito').sum().orderBy("Fecha_activacion")
    
    #Subimos el archivo creado a google cloud storage de nuestro primer paso.
    upload_file(df,numero_activacion,2)
Example #8
0
import requests
import urlparse
from dva.in_memory import redis_client

try:
    from google.cloud import storage
except:
    logging.exception("Could not import gcloud storage client")
    pass
try:
    S3 = boto3.resource('s3')
except:
    logging.exception("Could not initialize S3")
    pass
try:
    GS = storage.Client()
except:
    # suppress the exception unless GCloud support is really required.
    if settings.MEDIA_BUCKET and settings.CLOUD_FS_PREFIX == 'gs':
        logging.exception("Could not initialize GS client")
    pass

if settings.MEDIA_BUCKET and settings.CLOUD_FS_PREFIX == 's3':
    S3_MODE = True
    GS_MODE = False
    BUCKET = S3.Bucket(settings.MEDIA_BUCKET)
elif settings.MEDIA_BUCKET and settings.CLOUD_FS_PREFIX == 'gs':
    S3_MODE = False
    GS_MODE = True
    BUCKET = GS.get_bucket(settings.MEDIA_BUCKET)
else:
Example #9
0
with open('../config/gcp.yaml', 'r') as f:
    gcp = yaml.load(f, Loader=yaml.SafeLoader)

with open("../config/functions_args.json") as f:
    function_args_set = json.load(f)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = gcp['CREDENTIALS_JSON']
project_id = gcp['GCP_PROJECT']
shell = gcp['SHELL']
#account = gcp['FUNCTIONS_ACCOUNT']

p_temp = pathlib.Path('../functions')
function_list = [p for p in p_temp.iterdir() if p.is_dir()]

client = storage.Client(project_id)
bucket_name = conf['FUNCTION_BUCKET']

bucket = client.get_bucket(bucket_name)
deploy_func_set = []

for deploy_func in function_args_set:
    deploy_func_set.append(list(deploy_func.keys())[0])

for function in function_list:
    if function.name in deploy_func:
        print("Not Deploy {} ".format(function.name))
    else:
        gcloud_cmmand = [
            "gcloud", "functions", "deploy", "--allow-unauthenticated"
        ]
Example #10
0
def trade_bot(event, context):
    # Get the api key from cloud storage
    storage_client = storage.Client()
    bucket = storage_client.get_bucket('algobot_bucket_1')
    blob = bucket.blob('ameritradekey')
    api_key = blob.download_as_string()

    # Check if the market was open today
    today = datetime.today().astimezone(pytz.timezone("America/New_York"))
    today_fmt = today.strftime('%Y-%m-%d')

    market_url = 'https://api.tdameritrade.com/v1/marketdata/EQUITY/hours'

    params = {'apikey': api_key, 'date': today_fmt}

    request = requests.get(url=market_url, params=params).json()

    try:
        if request['equity']['EQ']['isOpen'] is True:
            # BQ creds
            client = bigquery.Client()

            # Load the historical stock data from BQ
            sql_hist = """
                SELECT
                  symbol,
                  closePrice,
                  date
                FROM
                  `trading-bot-1-283110.equity_data.daily_quote_data`
                """

            df = client.query(sql_hist).to_dataframe()

            # Convert the date column to datetime
            df['date'] = pd.to_datetime(df['date'])

            # Sort by date (ascending) for the momentum calculation
            df = df.sort_values(by='date').reset_index(drop=True)

            # Get the latest date for the data we have
            current_data_date = df['date'].max()

            # Rename the column
            df = df.rename(columns={'closePrice': 'close'})

            # Alpaca creds and api
            blob = bucket.blob('ameritradekey')
            keys = blob.download_as_string()
            keys_str = keys.decode().split(',')
            key_id = keys_str[0]
            secret_key = keys_str[1]

            # Initialize the alpaca api
            base_url = "https://paper-api.alpaca.markets"

            api = tradeapi.REST(key_id, secret_key, base_url, 'v2')

            # Get the current positions from alpaca and create a df
            positions = api.list_positions()

            symbol, qty, market_value = [], [], []

            for each in positions:
                symbol.append(each.symbol)
                qty.append(int(each.qty))
                market_value.append(float(each.market_value))

            df_pf = pd.DataFrame({
                'symbol': symbol,
                'qty': qty,
                'market_value': market_value
            })

            # Current portfolio value
            portfolio_value = round(df_pf['market_value'].sum(), 2)

            # Calculate the momentum and select the stocks to buy
            # Set the variables for the momentum trading strategy
            momentum_window = 125
            minimum_momentum = 40

            # Momentum score function
            def momentum_score(ts):
                x = np.arange(len(ts))
                log_ts = np.log(ts)
                regress = stats.linregress(x, log_ts)
                annualized_slope = (np.power(np.exp(regress[0]), 252) -
                                    1) * 100
                return annualized_slope * (regress[2]**2)

            df['momentum'] = df.groupby('symbol')['close'].rolling(
                momentum_window, min_periods=minimum_momentum).apply(
                    momentum_score).reset_index(level=0, drop=True)

            # Get the top momentum stocks for the period
            # Set the portfolio size we want
            portfolio_size = 10

            # Function to get the momentum stocks we want
            def get_momentum_stocks(df, date, portfolio_size, cash):
                # Filter the df to get the top 10 momentum stocks for the latest day
                df_top_m = df.loc[df['date'] == pd.to_datetime(date)]
                df_top_m = df_top_m.sort_values(
                    by='momentum', ascending=False).head(portfolio_size)

                # Set the universe to the top momentum stocks for the period
                universe = df_top_m['symbol'].tolist()

                # Create a df with just the stocks from the universe
                df_u = df.loc[df['symbol'].isin(universe)]

                # Create the portfolio
                # Pivot to format for the optimization library
                df_u = df_u.pivot_table(index='date',
                                        columns='symbol',
                                        values='close',
                                        aggfunc='sum')

                # Calculate expected returns and sample covariance
                mu = expected_returns.mean_historical_return(df_u)
                S = risk_models.sample_cov(df_u)

                # Optimise the portfolio for maximal Sharpe ratio
                ef = EfficientFrontier(mu, S,
                                       gamma=1)  # Use regularization (gamma=1)
                weights = ef.max_sharpe()
                cleaned_weights = ef.clean_weights()

                # Allocate
                latest_prices = get_latest_prices(df_u)

                da = DiscreteAllocation(cleaned_weights,
                                        latest_prices,
                                        total_portfolio_value=cash)

                allocation = da.lp_portfolio()[0]

                # Put the stocks and the number of shares from the portfolio into a df
                symbol_list = []
                num_shares_list = []

                for symbol, num_shares in allocation.items():
                    symbol_list.append(symbol)
                    num_shares_list.append(num_shares)

                # Now that we have the stocks we want to buy we filter the df for those ones
                df_buy = df.loc[df['symbol'].isin(symbol_list)]

                # Filter for the period to get the closing price
                df_buy = df_buy.loc[df_buy['date'] == date].sort_values(
                    by='symbol')

                # Add in the qty that was allocated to each stock
                df_buy['qty'] = num_shares_list

                # Calculate the amount we own for each stock
                df_buy['amount_held'] = df_buy['close'] * df_buy['qty']
                df_buy = df_buy.loc[df_buy['qty'] != 0]
                return df_buy

            # Call the function
            df_buy = get_momentum_stocks(df=df,
                                         date=current_data_date,
                                         portfolio_size=portfolio_size,
                                         cash=portfolio_value)

            # Figure out which stocks we need to sell

            # Create a list of stocks to sell based on what is currently in our pf
            sell_list = list(
                set(df_pf['symbol'].tolist()) - set(df_buy['symbol'].tolist()))

            def sell_stocks(df, df_pf, sell_list, date):
                # Get the current prices and the number of shares to sell
                df_sell_price = df.loc[df['date'] == pd.to_datetime(date)]

                # Filter
                df_sell_price = df_sell_price.loc[df_sell_price['symbol'].isin(
                    sell_list)]

                # Check to see if there are any stocks in the current ones to buy
                # that are not in the current portfolio. It's possible there may not be any
                if df_sell_price.shape[0] > 0:
                    df_sell_price = df_sell_price[['symbol', 'close']]

                    # Merge with the current pf to get the number of shares we bought initially
                    # so we know how many to sell
                    df_buy_shares = df_pf[['symbol', 'qty']]

                    df_sell = pd.merge(df_sell_price,
                                       df_buy_shares,
                                       on='symbol',
                                       how='left')

                else:
                    df_sell = None

                return df_sell

            # Call the function
            df_sell = sell_stocks(df=df,
                                  df_pf=df_pf,
                                  sell_list=sell_list,
                                  date=current_data_date)

            # Get a list of all stocks to sell i.e. any not in the current df_buy and any diff in qty
            def stock_diffs(df_sell, df_pf, df_buy):
                df_stocks_held_prev = df_pf[['symbol', 'qty']]
                df_stocks_held_curr = df_buy[['symbol', 'qty', 'close']]

                # Inner merge to get the stocks that are the same week to week
                df_stock_diff = pd.merge(df_stocks_held_curr,
                                         df_stocks_held_prev,
                                         on='symbol',
                                         how='inner')

                # Check to make sure not all of the stocks are different compared to what we have in the pf
                if df_stock_diff.shape[0] > 0:
                    # Calculate any difference in positions based on the new pf
                    df_stock_diff['share_amt_change'] = df_stock_diff[
                        'qty_x'] - df_stock_diff['qty_y']

                    # Create df with the share difference and current closing price
                    df_stock_diff = df_stock_diff[[
                        'symbol', 'share_amt_change', 'close'
                    ]]

                    # If there's less shares compared to last week for the stocks that
                    # are still in our portfolio, sell those shares
                    df_stock_diff_sale = df_stock_diff.loc[
                        df_stock_diff['share_amt_change'] < 0]

                    # If there are stocks whose qty decreased,
                    # add the df with the stocks that dropped out of the pf
                    if df_stock_diff_sale.shape[0] > 0:
                        if df_sell is not None:
                            df_sell_final = pd.concat(
                                [df_sell, df_stock_diff_sale], sort=True)
                            # Fill in NaNs in the share amount change column with
                            # the qty of the stocks no longer in the pf, then drop the qty columns
                            df_sell_final['share_amt_change'] = df_sell_final[
                                'share_amt_change'].fillna(
                                    df_sell_final['qty'])
                            df_sell_final = df_sell_final.drop(['qty'], 1)
                            # Turn the negative numbers into positive for the order
                            df_sell_final['share_amt_change'] = np.abs(
                                df_sell_final['share_amt_change'])
                            df_sell_final.columns = df_sell_final.columns.str.replace(
                                'share_amt_change', 'qty')
                        else:
                            df_sell_final = df_stock_diff_sale
                            # Turn the negative numbers into positive for the order
                            df_sell_final['share_amt_change'] = np.abs(
                                df_sell_final['share_amt_change'])
                            df_sell_final.columns = df_sell_final.columns.str.replace(
                                'share_amt_change', 'qty')
                    else:
                        df_sell_final = None
                else:
                    df_sell_final = df_stocks_held_curr

                return df_sell_final

            # Call the function
            df_sell_final = stock_diffs(df_sell=df_sell,
                                        df_pf=df_pf,
                                        df_buy=df_buy)

            # Send the sell order to the api
            if df_sell_final is not None:
                symbol_list = df_sell_final['symbol'].tolist()
                qty_list = df_sell_final['qty'].tolist()
                try:
                    for symbol, qty in list(zip(symbol_list, qty_list)):
                        api.submit_order(symbol=symbol,
                                         qty=qty,
                                         side='sell',
                                         type='market',
                                         time_in_force='day')
                except Exception:
                    pass

            # Buy the stocks that increased in shares compared
            # to last week or any new stocks
            def df_buy_new(df_pf, df_buy):
                # Left merge to get any new stocks or see if they changed qty
                df_buy_new = pd.merge(df_buy, df_pf, on='symbol', how='left')

                # Get the qty we need to increase our positions by
                df_buy_new = df_buy_new.fillna(0)
                df_buy_new[
                    'qty_new'] = df_buy_new['qty_x'] - df_buy_new['qty_y']

                # Filter for only shares that increased
                df_buy_new = df_buy_new.loc[df_buy_new['qty_new'] > 0]
                if df_buy_new.shape[0] > 0:
                    df_buy_new = df_buy_new[['symbol', 'qty_new']]
                    df_buy_new = df_buy_new.rename(columns={'qty_new': 'qty'})
                else:
                    df_buy_new = None

                return df_buy_new

            # Call the function
            df_buy_new = df_buy_new(df_pf=df_pf, df_buy=df_buy)

            # Send the buy order to the api
            if df_buy_new is not None:
                symbol_list = df_buy_new['symbol'].tolist()
                qty_list = df_buy_new['qty'].tolist()
                try:
                    for symbol, qty in list(zip(symbol_list, qty_list)):
                        api.submit_order(symbol=symbol,
                                         qty=qty,
                                         side='buy',
                                         type='market',
                                         time_in_force='day')
                except Exception:
                    pass

            # Log the updated pf
            positions = api.list_positions()

            symbol, qty, market_value = [], [], []

            for each in positions:
                symbol.append(each.symbol)
                qty.append(int(each.qty))

            # New position df
            position_df = pd.DataFrame({'symbol': symbol, 'qty': qty})

            # Add the current date and other info into the portfolio df for logging
            position_df['date'] = pd.to_datetime(today_fmt)
            position_df['strat'] = 'momentum_strat_1'

            # Add the new pf to BQ
            # Format date to match schema
            position_df['date'] = position_df['date'].dt.date

            # Append it to the anomaly table
            dataset_id = 'equity_data'
            table_id = 'strategy_log'

            dataset_ref = client.dataset(dataset_id)
            table_ref = dataset_ref.table(table_id)

            job_config = bigquery.LoadJobConfig()
            job_config.source_format = bigquery.SourceFormat.CSV
            job_config.autodetect = True
            job_config.ignore_unknown_values = True

            job = client.load_table_from_dataframe(position_df,
                                                   table_ref,
                                                   location='US',
                                                   job_config=job_config)

            job.result()

            return 'Success'

        else:
            # Market Not Open Today
            pass

    except KeyError:
        # Not a weekday
        pass
Example #11
0
async def data_process(event, context):
    """Background Cloud Function to be triggered by Cloud Storage.
       This generic function logs relevant data when a file is changed.

    Args:
        event (dict):  The dictionary with data specific to this type of event.
                       The `data` field contains a description of the event in
                       the Cloud Storage `object` format described here:
                       https://cloud.google.com/storage/docs/json_api/v1/objects#resource
        context (google.cloud.functions.Context): Metadata of triggering event.
    Returns:
        None; the output is written to Stackdriver Logging
    """
    if event['name'] == "newDataFile":
        tempFilePath = tempfile.mkdtemp()
        storage_client = storage.Client()

        bucket = storage_client.bucket("digital-equity.appspot.com")
        blobNew = bucket.blob("newDataFile")
        blobNew.download_to_filename(tempFilePath + "/newDataFile.json")
        blobOld = bucket.blob("totalDataFile")
        blobOld.download_to_filename(tempFilePath + "/totalDataFile.json")

        # cleaning up the data
        data_array = [
            pd.read_json(tempFilePath + "/newDataFile.json"),
            pd.read_json(tempFilePath + "/totalDataFile.json")
        ]
        if 'School Name' in data_array[0].columns:
            print('data not found')
            data_array[0] = data_array[0].applymap(lambda x: np.nan
                                                   if not x else x)
            school_names = [d["School Name"] for d in data_array]
            allschoolnames = pd.concat(
                school_names).drop_duplicates().reset_index(drop=True)
            currentYear = pd.merge(data_array[0],
                                   allschoolnames,
                                   on='School Name',
                                   how="outer")
            data_array[0] = currentYear
            currentTempYear = int(currentYear['SY'][0])
            all_merged = pd.concat(data_array)
            all_merged.to_json(tempFilePath + "/totalDataFile2.json",
                               orient='records')
            tempYear = int(all_merged.at[len(data_array[1]) - 1, 'SY'])
            renameYear = max(currentTempYear, tempYear)
            # renaming the old
            bucket.rename_blob(blobOld, "totalDataFile" + str(renameYear))

            # uploading new file
            updatedTotal = bucket.blob("totalDataFile")
            updatedTotal.upload_from_filename(tempFilePath +
                                              "/totalDataFile2.json")

            # redeployment
            url = "https://api.github.com/repos/mbae-org/spark-digital-equity/actions/workflows/build.yml/dispatches"

            payload = "{\n    \"inputs\": {\n        \"downloadURL\": \"https://storage.googleapis.com/digital-equity.appspot.com/totalDataFile \"\n    },\n    \"ref\": \"master\"\n}"
            headers = {
                'Content-Type': 'application/json',
                'Accept': 'application/vnd.github.v3+json',
                'Authorization': 'Bearer ' + os.environ.get('access_token')
            }

            response = requests.request("POST",
                                        url,
                                        headers=headers,
                                        data=payload)
            print(response)

    else:
        print("did not update")
Example #12
0
def upload_photo():
    photo = request.files['file']

    name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=6))
    # Create a Cloud Storage client.
    storage_client = storage.Client()

    # Get the bucket that the file will be uploaded to.
    bucket = storage_client.get_bucket(CLOUD_STORAGE_BUCKET)

    # Create a new blob and upload the file's content.
    blob = bucket.blob(name + '.jpg')
    blob.upload_from_string(photo.read(), content_type=photo.content_type)

    # Make the blob publicly viewable.
    blob.make_public()

    audioname = ''.join(
        random.choices(string.ascii_uppercase + string.digits, k=6))

    tts_client = texttospeech.TextToSpeechClient()

    clarifaiClient = ClarifaiApp()

    model = clarifaiClient.models.get('foodcam')
    out = model.predict_by_url(blob.public_url)
    best = str(out['outputs'][0]['data']['concepts'][0]['id'])

    intext = texttospeech.types.SynthesisInput(text=best)

    voice = texttospeech.types.VoiceSelectionParams(
        language_code='en-US',
        ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)

    # Select the type of audio file you want returned
    audio_config = texttospeech.types.AudioConfig(
        audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    outspeech = tts_client.synthesize_speech(intext, voice, audio_config)

    with open('output.mp3', 'wb') as out:
        # Write the response to the output file.
        out.write(outspeech.audio_content)

    speechblob = bucket.blob(audioname + '.mp3')
    speechblob.upload_from_filename('output.mp3')

    speechblob.make_public()

    # Create a Cloud Datastore client.
    datastore_client = datastore.Client()

    # Fetch the current date / time.
    current_time = datetime.now()
    current_datetime = current_time.timestamp()

    # The kind for the new entity.
    kind = 'Images'

    # The name/ID for the new entity.
    name = blob.name

    # Create the Cloud Datastore key for the new entity.
    key = datastore_client.key(kind, name)

    # Construct the new entity using the key. Set dictionary values for entity
    # keys blob_name, storage_public_url, timestamp, and joy.
    entity = datastore.Entity(key)
    entity['blob_name'] = blob.name
    entity['image_public_url'] = blob.public_url
    entity['timestamp'] = current_datetime
    entity['best'] = best

    # Save the new entity to Datastore.
    datastore_client.put(entity)

    kind1 = 'Audio'

    # The name/ID for the new entity.
    name1 = speechblob.name

    # Create the Cloud Datastore key for the new entity.
    key1 = datastore_client.key(kind1, name1)

    # Construct the new entity using the key. Set dictionary values for entity
    # keys blob_name, storage_public_url, timestamp, and joy.
    entity1 = datastore.Entity(key1)
    entity1['blob_name'] = speechblob.name
    entity1['speech_public_url'] = speechblob.public_url
    entity1['timestamp'] = current_datetime

    # Save the new entity to Datastore.
    datastore_client.put(entity1)

    # Redirect to the home page.
    return redirect('/results')
Example #13
0
def main(platform_id, platform, args, config):
    loggingLevel = getattr(logging, args.log.upper(), None)
    logging.basicConfig(level=loggingLevel)
    logger = logging.getLogger()

    print('PLATFORM_ID:', platform_id)
    print('PLATFORM INFO:', platform)

    if args.path:
        print('Running tests in path: %s' % args.path)
    else:
        print('Running all tests!')

    if args.upload:
        print('Setting up storage client')
        from google.cloud import storage
        storage_client = storage.Client(project='wptdashboard')
        bucket = storage_client.get_bucket(config['gs_results_bucket'])
        verify_gsutil_installed(config)

    if args.create_testrun:
        assert len(config['secret']) == 64, (
            'Valid secret required to create TestRun')

    if not platform.get('sauce'):
        if platform['browser_name'] == 'chrome':
            browser_binary = config['chrome_binary']
        elif platform['browser_name'] == 'firefox':
            browser_binary = config['firefox_binary']

        if platform['browser_name'] == 'chrome':
            verify_browser_binary_version(platform, browser_binary)
        verify_os_name(platform)
        verify_or_set_os_version(platform)

    print('Platform information:')
    print('Browser version: %s' % platform['browser_version'])
    print('OS name: %s' % platform['os_name'])
    print('OS version: %s' % platform['os_version'])

    print('==================================================')
    print('Setting up WPT checkout')

    wpt_sha = setup_wpt(args, platform, config, logger)

    print('Current WPT SHA: %s' % wpt_sha)

    return_code = subprocess.check_call(['git', 'checkout', wpt_sha],
                                        cwd=config['wpt_path'])
    assert return_code == 0, ('Got non-0 return code: '
                              '%d from command %s' % (return_code, command))

    short_wpt_sha = wpt_sha[0:10]

    abs_report_log_path = "%s/wptd-%s-%s-report.log" % (
        config['build_path'], short_wpt_sha, platform_id)

    sha_summary_gz_path = '%s/%s-summary.json.gz' % (short_wpt_sha,
                                                     platform_id)
    abs_sha_summary_gz_path = "%s/%s" % (config['build_path'],
                                         sha_summary_gz_path)

    gs_results_base_path = "%s/%s/%s" % (config['build_path'], short_wpt_sha,
                                         platform_id)
    gs_results_url = 'https://storage.googleapis.com/%s/%s' % (
        config['gs_results_bucket'], sha_summary_gz_path)

    print('==================================================')
    print('Running WPT')

    if platform.get('sauce'):
        if platform['browser_name'] == 'edge':
            sauce_browser_name = 'MicrosoftEdge'
        else:
            sauce_browser_name = platform['browser_name']

        command = [
            './wpt',
            'run',
            'sauce:%s:%s' % (sauce_browser_name, platform['browser_version']),
            '--sauce-platform=%s' % platform['os_name'],
            '--sauce-key=%s' % config['sauce_key'],
            '--sauce-user=%s' % config['sauce_user'],
            '--sauce-connect-binary=%s' % config['sauce_connect_path'],
            '--sauce-tunnel-id=%s' % config['sauce_tunnel_id'],
            '--no-restart-on-unexpected',
            '--processes=2',
            '--run-by-dir=3',
        ]
        if args.path:
            command.insert(3, args.path)
    else:
        command = [
            'xvfb-run',
            '--auto-servernum',
            './wpt',
            'run',
            platform['browser_name'],
        ]

        if args.path:
            command.insert(5, args.path)
        if platform['browser_name'] == 'chrome':
            command.extend(['--binary', browser_binary])
        if platform['browser_name'] == 'firefox':
            command.extend(['--install-browser', '--yes'])
            command.append('--certutil-binary=certutil')
            # temporary fix to allow WebRTC tests to call getUserMedia
            command.extend(['--setpref', 'media.navigator.streams.fake=true'])

    command.append('--log-mach=-')
    command.extend(['--log-wptreport', abs_report_log_path])
    command.append('--install-fonts')

    return_code = subprocess.call(command, cwd=config['wpt_path'])

    print('==================================================')
    print('Finished WPT run')
    print('Return code from wptrunner: %s' % return_code)

    if platform['browser_name'] == 'firefox':
        print('Verifying installed firefox matches platform ID')
        firefox_path = '%s/_venv/firefox/firefox' % config['wpt_path']
        verify_browser_binary_version(platform, firefox_path)

    with open(abs_report_log_path) as f:
        report = json.load(f)

    assert len(report['results']) > 0, (
        '0 test results, something went wrong, stopping.')

    summary = report_to_summary(report)

    print('==================================================')
    print('Writing summary.json.gz to local filesystem')
    write_gzip_json(abs_sha_summary_gz_path, summary)
    print('Wrote file %s' % abs_sha_summary_gz_path)

    print('==================================================')
    print('Writing individual result files to local filesystem')
    for result in report['results']:
        test_file = result['test']
        filepath = '%s%s' % (gs_results_base_path, test_file)
        write_gzip_json(filepath, result)
        print('Wrote file %s' % filepath)

    if not args.upload:
        print('==================================================')
        print('Stopping here (pass --upload to upload results to WPTD).')
        return

    print('==================================================')
    print('Uploading results to gs://%s' % config['gs_results_bucket'])
    command = [
        'gsutil', '-m', '-h', 'Content-Encoding:gzip', 'rsync', '-r',
        short_wpt_sha,
        'gs://wptd/%s' % short_wpt_sha
    ]
    return_code = subprocess.check_call(command, cwd=config['build_path'])
    assert return_code == 0
    print('Successfully uploaded!')
    print('HTTP summary URL: %s' % gs_results_url)

    if not args.create_testrun:
        print('==================================================')
        print('Stopping here')
        print('pass --create-testrun to create and promote this TestRun).')
        return

    print('==================================================')
    print('Creating new TestRun in the dashboard...')
    url = '%s/api/run' % config['wptd_prod_host']
    response = requests.post(url,
                             params={'secret': config['secret']},
                             data=json.dumps({
                                 'browser_name':
                                 platform['browser_name'],
                                 'browser_version':
                                 platform['browser_version'],
                                 'os_name':
                                 platform['os_name'],
                                 'os_version':
                                 platform['os_version'],
                                 'revision':
                                 short_wpt_sha,
                                 'results_url':
                                 gs_results_url
                             }))
    if response.status_code == 201:
        print('Run created!')
    else:
        print('There was an issue creating the TestRun.')

    print('Response status code:', response.status_code)
    print('Response text:', response.text)
Example #14
0
def post_audio():
    if request.method == 'GET':
        # =========================ElasticSearch API=========================
        elasticHeaders = {
            'Content-Type': 'application/json',
        }
        elastic = {
            "productid": "bike4",
            "eng_desc": "how old is the Brooklyn Bridge",
            "ratings": "3"
        }
        elasticData = json.dumps(elastic)

        # data = open('request.json', 'rb').read() #json request file required
        elasticResponse = requests.get(
            'http://104.198.254.220:9200/_search?q=bike')
        # a = json.loads(response.text)

        return elasticResponse.text

    if request.method == 'POST':
        # get data using flask
        blob = request.get_data()
        # print (request.form['file'])
        # print "======"
        # print(request.data)

        # Open file and write binary (blob) data
        f = open('./audio.wav', 'w+')
        f.write(request.data)
        f.close()

        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'credentials.json'
        params = (('key', 'AIzaSyAxgxicufBuHtEMsqScWdu4Uaivs0Laox4'), )

        # =========================Storage API=========================
        # storage REST api
        # storageAPI = 'https://www.googleapis.com/storage/v1/b/canadiantired/o/test.wav'
        # storageResponse = requests.get(url=storageAPI, params=params)
        # print storageResponse.text

        # Instantiates a client
        storage_client = storage.Client()

        #This creates a new bucket
        # # The name for the new bucket
        bucket_name = 'canadiantired'

        # # Creates the new bucket
        # bucket = storage_client.create_bucket(bucket_name)

        # print('Bucket {} created.'.format(bucket.name))

        def upload_blob(bucket_name, source_file_name, destination_blob_name):
            """Uploads a file to the bucket."""
            storage_client = storage.Client()
            bucket = storage_client.get_bucket(bucket_name)
            blob = bucket.blob(destination_blob_name)

            print('Blob {} is publicly accessible at {}'.format(
                blob.name, blob.public_url))

            blob.upload_from_filename(source_file_name)
            blob.make_public()

            print('File {} uploaded to {}.'.format(source_file_name,
                                                   destination_blob_name))

        upload_blob(bucket_name, 'audio.wav', 'audio.wav')

        # =========================Speech Data API=========================
        # Here down works

        speechHeaders = {'Content-Type': 'application/json'}
        # speech = {
        #     "config": {
        #         "encoding":"LINEAR16",
        #         "sample_rate": 16000,
        #         "language_code": "en-US"
        #     },
        #     "audio": {
        #         "uri":"gs://canadiantired/audio.wav"
        #     }
        # }

        speech = {
            "config": {
                "encoding": "FLAC",
                "sample_rate": 16000,
                "language_code": "en-US"
            },
            "audio": {
                "uri": "gs://cloud-samples-tests/speech/brooklyn.flac"
            }
        }

        speechData = json.dumps(speech)
        speechAPI = 'https://speech.googleapis.com/v1beta1/speech:syncrecognize'

        speechResponse = requests.post(url=speechAPI,
                                       data=speechData,
                                       params=params,
                                       headers=speechHeaders)
        # print(speechResponse.status_code, speechResponse.reason, speechResponse.text)

        # =========================ElasticSearch API=========================
        elasticHeaders = {
            'Content-Type': 'application/json',
        }
        elastic = {
            "productid": "bike4",
            "eng_desc": "how old is the Brooklyn Bridge",
            "ratings": "3"
        }

        elasticData = json.dumps(elastic)
        # data = open('request.json', 'rb').read() #json request file required
        elasticResponse = requests.get(
            'http://104.198.254.220:9200/_search?q=bike')
        # a = json.loads(response.text)

        print(elasticResponse)
        return elasticResponse.text
def get_blob_from_path(path):
    bucket_name = re.search("dataproc.+?/", path).group(0)[0:-1]
    bucket = storage.Client().get_bucket(bucket_name)
    output_location = re.search("google-cloud-dataproc.+", path).group(0)
    return bucket.blob(output_location)
Example #16
0
    configPath = "config.ini"
    try:
        # Loading app configuration from config.ini
        # Requied by gcloud framework
        # Loading respective collection path from config.ini
        # Loading storage path to fetch images
        parser.read(configPath)[-1] == "config.ini"
        filePath = parser["CLOUD_CONFIG"].get("SFP")
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(filePath)
        projectName = parser["CLOUD_CONFIG"].get("PJN")
        devId = parser["APP_CONFIG"].get("CameraName")

        # Check internet before doing this 
        db = firestore.Client()
        camera_collection = db.collection(parser["CLOUD_CONFIG"].get("CAM"))
        intruder_collection = db.collection(parser["CLOUD_CONFIG"].get("LOG"))
        facedata_collection = db.collection(parser["CLOUD_CONFIG"].get("FAD"))
        trainface_collection = db.collection(parser["CLOUD_CONFIG"].get("TFD"))

        image_bucket = storage.Client()
        bucket = image_bucket.get_bucket(projectName + ".appspot.com")

        trainface_notifier = trainface_collection.on_snapshot(new_face_added)
        if trainface_notifier._closed:
            trainface_notifier =  trainface_collection.on_snapshot(new_face_added)
        
    except (FileNotFoundError):
        # Throws a exception if there is no file/internet connection
    	print("Error loading config file\n Exiting")
    	exit(0)
Example #17
0
def build_and_push_artifacts(go_dir, src_dir, registry, publish_path=None,
                             gcb_project=None, build_info_path=None):
  """Build and push the artifacts.

  Args:
    go_dir: The GOPATH directory
    src_dir: The root directory where we checked out the repo.
    registry: Docker registry to use.
    publish_path: (Optional) The GCS path where artifacts should be published.
       Set to none to only build locally.
    gcb_project: The project to use with GCB to build docker images.
      If set to none uses docker to build.
    build_info_path: (Optional): GCS location to write YAML file containing
      information about the build.
  """
  # Update the GOPATH to the temporary directory.
  env = os.environ.copy()
  if go_dir:
    env["GOPATH"] = go_dir

  bin_dir = os.path.join(src_dir, "bin")
  if not os.path.exists(bin_dir):
    os.makedirs(bin_dir)

  build_info = build_operator_image(src_dir, registry, project=gcb_project)

  # Copy the chart to a temporary directory because we will modify some
  # of its YAML files.
  chart_build_dir = tempfile.mkdtemp(prefix="tmpTfJobChartBuild")
  shutil.copytree(os.path.join(src_dir, "tf-job-operator-chart"),
                  os.path.join(chart_build_dir, "tf-job-operator-chart"))
  version = build_info["image"].split(":")[-1]
  values_file = os.path.join(chart_build_dir, "tf-job-operator-chart",
                             "values.yaml")
  update_values(values_file, build_info["image"])

  chart_file = os.path.join(chart_build_dir, "tf-job-operator-chart",
                            "Chart.yaml")
  update_chart(chart_file, version)

  # Delete any existing matches because we assume there is only 1 below.
  matches = glob.glob(os.path.join(bin_dir, "tf-job-operator-chart*.tgz"))
  for m in matches:
    logging.info("Delete previous build: %s", m)
    os.unlink(m)

  util.run(["helm", "package", "--save=false", "--destination=" + bin_dir,
            "./tf-job-operator-chart"], cwd=chart_build_dir)

  matches = glob.glob(os.path.join(bin_dir, "tf-job-operator-chart*.tgz"))

  if len(matches) != 1:
    raise ValueError(
      "Expected 1 chart archive to match but found {0}".format(matches))

  chart_archive = matches[0]

  release_path = version

  targets = [
    os.path.join(release_path, os.path.basename(chart_archive)),
    "latest/tf-job-operator-chart-latest.tgz",
  ]

  if publish_path:
    gcs_client = storage.Client(project=gcb_project)
    bucket_name, base_path = util.split_gcs_uri(publish_path)
    bucket = gcs_client.get_bucket(bucket_name)
    for t in targets:
      blob = bucket.blob(os.path.join(base_path, t))
      gcs_path = util.to_gcs_uri(bucket_name, blob.name)
      if not t.startswith("latest"):
        build_info["helm_chart"] = gcs_path
      if blob.exists() and not t.startswith("latest"):
        logging.warn("%s already exists", gcs_path)
        continue
      logging.info("Uploading %s to %s.", chart_archive, gcs_path)
      blob.upload_from_filename(chart_archive)

    create_latest(bucket, build_info["commit"],
                  util.to_gcs_uri(bucket_name, targets[0]))

  # Always write to the bin dir.
  paths = [os.path.join(bin_dir, "build_info.yaml")]

  if build_info_path:
    paths.append(build_info_path)

  write_build_info(build_info, paths, project=gcb_project)
Example #18
0
    else:
        print('blob {} is not outdated, not uploading'.format(blob.name))


def upload_entry(blob, dbx, entry):
    print('downloading from dropbox:', entry.name)
    dbx.files_download_to_file(entry.name, entry.id)
    print('uploading to GCS:', 'ELVOs_anon/' + entry.name)
    blob.upload_from_filename(entry.name)
    os.remove(entry.name)


if __name__ == '__main__':
    dbx = dropbox.Dropbox(os.environ['DROPBOX_TOKEN'])

    gcs_client = storage.Client(project='elvo-198322')
    bucket = gcs_client.get_bucket('elvos')

    results = dbx.files_list_folder('id:ROCtfi_cdqAAAAAAAAB7Uw')

    if results.has_more:
        raise RuntimeError('has_more=True is currently not supported')

    for entry in results.entries:
        # TODO(#102): Explain '7_11 Redownloaded Studies'
        if entry.name in ('7_17 New ELVOs'):
            print('uploading files in folder:', entry.name)
            subdir_results = dbx.files_list_folder(entry.id)
            for e in subdir_results.entries:
                upload_entry_if_outdated(e, dbx, bucket)
        elif isinstance(entry, FolderMetadata):
Example #19
0
def upload_photo():
    # Create a Cloud Storage client.
    storage_client = storage.Client()

    # Get the Cloud Storage bucket that the file will be uploaded to.
    bucket = storage_client.get_bucket(os.environ.get('BUCKET'))

    # Create a new blob and upload the file's content to Cloud Storage.
    # image_b64 = request.values['file']
    # image_data = re.sub('^data:image/.+;base64,', '', image_b64).decode('base64')
    # image_PIL = Image.open(StringIO(image_b64))
    #
    # image_PIL.save("image.png")

    # print("fml")
    # print(request.data)

    dataDict = json.loads(request.data.decode(encoding='UTF-8'))

    print(dataDict['weirdImg'])

    imgData = urllib.parse.unquote(dataDict['weirdImg'])
    print(imgData)

    blob = bucket.blob("image.png")
    blob.upload_from_string(imgData)
    print("Got to spot 1")
    # Make the blob publicly viewable.
    blob.make_public()
    image_public_url = blob.public_url
    print("Got to spot 2")

    # Create a Cloud Vision client.
    vision_client = vision.ImageAnnotatorClient()

    # Retrieve a Vision API response for the photo stored in Cloud Storage
    source_uri = 'gs://{}/{}'.format(os.environ.get('BUCKET'), blob.name)
    response = vision_client.annotate_image({
        'image': {
            'content': imgData.encode()
        },
    })

    print(response)

    print("Got to spot 3")
    labels = response.label_annotations
    faces = response.face_annotations
    web_entities = response.web_detection.web_entities
    print("Got to spot 4")

    # Create a Cloud Datastore client
    datastore_client = datastore.Client()

    # The kind for the new entity
    kind = 'Photos'

    # The name/ID for the new entity
    name = blob.name

    # Create the Cloud Datastore key for the new entity
    key = datastore_client.key(kind, name)
    print("Got to spot 5")

    # Construct the new entity using the key. Set dictionary values for entity
    # keys image_public_url and label.
    # entity = datastore.Entity(key)
    # entity['image_public_url'] = image_public_url
    #
    #
    #
    #
    # #labels[0].description = "description"
    #
    #
    #
    #
    # entity['label'] = labels[0].description
    # print("Got to spot 6")
    #
    # # Save the new entity to Datastore
    # datastore_client.put(entity)
    # print("Got to spot 7")

    # Redirect to the home page.
    emotions = []
    for face in faces:
        emotions += [face.joy_likelihood]
        emotions += [face.sorrow_likelihood]
        emotions += [face.anger_likelihood]
        emotions += [face.surprise_likelihood]
        emotions += [face.headwear_likelihood]
    if len(emotions) > 0:
        emojifinal = num_to_emoji(emotions)
    else:
        print("no face")
        emojifinal = "No face detected"

    return render_template('homepage.html',
                           labels=labels,
                           faces=faces,
                           web_entities=web_entities,
                           public_url=image_public_url,
                           emojifinal=emojifinal)
Example #20
0
def Ejecutar():

    reload(sys)
    sys.setdefaultencoding('utf8')
    storage_client = storage.Client()
    bucket = storage_client.get_bucket('ct-telefonia')
    gcs_path = 'gs://ct-telefonia'
    sub_path = KEY_REPORT + '/'
    output = gcs_path + "/" + sub_path + fecha + ext
    blob = bucket.blob(sub_path + fecha + ext)
    dateini = request.args.get('dateini')
    dateend = request.args.get('dateend')

    if dateini is None:
        dateini = GetDate1
    else:
        dateini = dateini + hour1

    if dateend is None:
        dateend = GetDate2
    else:
        dateend = dateend + hour2

    client = bigquery.Client()
    QUERY = (
        'SELECT servidor, operacion, token, ipdial_code, id_cliente, cartera FROM telefonia.parametros_ipdial where where Estado = "Activado" order by ipdial_code asc '
    )  #WHERE ipdial_code = "intcob-unisabaneta"
    #  'SELECT servidor, operacion, token, ipdial_code, id_cliente, cartera FROM telefonia.parametros_ipdial where ipdial_code = "intcob-banco-sufi-cast"')
    query_job = client.query(QUERY)
    rows = query_job.result()
    data = ""

    try:
        os.remove(ruta_completa)  #Eliminar de aries
    except:
        print("Eliminado de aries")

    try:
        blob.delete()  #Eliminar del storage
    except:
        print("Eliminado de storage")

    try:
        QUERY2 = (
            'delete FROM `contento-bi.telefonia.detalle_abandono` where cast(substr(date,0,10)as date) = '
            + '"' + dateini[0:4] + '-' + dateini[4:-8] + '-' + dateini[6:-6] +
            '"')
        query_job = client.query(QUERY2)
        rows2 = query_job.result()
    except:
        print("Eliminado de bigquery")

    file = open(ruta_completa, "a")
    for row in rows:
        url = 'http://' + str(
            row.servidor
        ) + '/ipdialbox/api_reports.php?token=' + row.token + '&report=' + str(
            CODE_REPORT) + '&date_ini=' + dateini + '&date_end=' + dateend
        # datos = requests.get(url).content
        try:
            datos = requests.get(url).content
        except requests.exceptions.RequestException as error:
            # print('error conectando con '+ row.ipdial_code + ':'+str(error))
            continue

        if len(requests.get(url).content) < 70:
            continue
        else:
            i = json.loads(datos)
            for rown in i:
                file.write(
                    str(rown["ID"]).encode('utf-8') + "|" +
                    str(rown["QUEUE"]).encode('utf-8') + "|" +
                    str(rown["DATE"]).encode('utf-8') + "|" +
                    str(rown["RESULT"]).encode('utf-8') + "|" +
                    str(rown["ANI"]).encode('utf-8') + "|" +
                    str(rown["ABANDONTIME"]).encode('utf-8') + "|" +
                    str(rown["TYPE_OF_INTERACTION"]).encode('utf-8') + "|" +
                    str(row.id_cliente) + "|" + str(row.ipdial_code) + "|" +
                    str(row.cartera).encode('utf-8') + "\n")

    file.close()
    blob.upload_from_filename(ruta_completa)
    time.sleep(10)
    ejecutar = detalle_abandono_beam.run(
        output, KEY_REPORT
    )  #[[[[[[[[[[[[[[[[[[***********************************]]]]]]]]]]]]]]]]]]
    time.sleep(60)

    return ("Se acaba de ejecutar el proceso de " + KEY_REPORT +
            " Para actualizar desde: " + dateini + " hasta " + dateend)
Example #21
0
WEB_API_KEY = "AIzaSyCwvUgLW2pKUta-Me4oMi-JYumzAfavtcs"
user_auth = firebase_user_auth.initialize(WEB_API_KEY)
# keeping already watching list
chats_watch_list = {}



# initializes fb with bucket name
cred = credentials.Certificate(CONFIG)
default_app = firebase_admin.initialize_app(cred,{
    'storageBucket': 'fevici.appspot.com'
})
# configure buckets

credentials = service_account.Credentials.from_service_account_info(CONFIG)
client = storage.Client(project='fevici', credentials=credentials)

bucket = client.get_bucket('fevici.appspot.com')


#SIRVE PARA PROBAR COMO GUARDAR FILES
# blob = bucket.blob('my-test-file.txt')
# blob.upload_from_string('this is test content!')

#SIRVE PARA LISTAR LOS BLOBS QUE EXISTEN
# for blob in client.list_blobs('fevici.appspot.com', prefix='abc/myfolder'): #Con prefijo
all_projects = [blob.name for blob in client.list_blobs('fevici.appspot.com')]
# print(all_projects)


#Db references
Example #22
0
#cos = boto3.resource('s3',
#region_name='nyc3',
#endpoint_url='https://onestoop00001.nyc3.digitaloceanspaces.com',
#aws_access_key_id=app.config['SPACES_KEY'],
#aws_secret_access_key=app.config['SPACES_SECRET'])

#try:
#buckets = cos.buckets.all()
#for bucket in buckets:
#print("Bucket Name: {0}".format(bucket.name))
#except ClientError as be:
#print("CLIENT ERROR: {0}\n".format(be))
#except Exception as e:
#print("Unable to retrieve list buckets: {0}".format(e))

gcsClient = storage.Client()
#gcsBucket = client.get_bucket('onestoopimages01')

from firebase_admin import firestore

DB = firestore.client()

#client = SearchClient.create(app.config['ALGOLIA_CONFIG']["appId"],
#app.config['ALGOLIA_CONFIG']["searchKey"])
#searchIndex = client.init_index(app.config['ALGOLIA_CONFIG']["index"])
#searchIndex.set_settings({'attributesForFaceting': ['visibility']})

if (app.debug):
    from werkzeug.debug import DebuggedApplication
    app.wsgi_app = DebuggedApplication(app.wsgi_app, True)
Example #23
0
def get_logs(log_bucket, strides_ip, strides_port):
    storage_client = storage.Client()

    bucket = storage.Client().bucket(log_bucket)

    format = [
        "time_micros",
        "c_ip",
        "c_ip_type",
        "c_ip_region",
        "cs_method",
        "cs_uri",
        "sc_status",
        "cs_bytes",
        "sc_bytes",
        "time_taken_micros",
        "cs_host",
        "cs_referer",
        "cs_user_agent",
        "s_request_id",
        "cs_operation",
        "cs_bucket",
        "cs_object",
    ]

    log_files = bucket.list_blobs()

    conn = http.client.HTTPConnection(strides_ip, port=strides_port)

    with dbm.open("gs_agent_blast_hackathon.db", "c") as db:
        for log_file in log_files:
            if "_usage_" not in log_file.public_url:
                print(f"skipping {log_file.public_url}\n")
                continue

            if log_file.public_url in db:
                print(f"{log_file.public_url} already processed\n")
                continue

            logs = log_file.download_as_string()
            body = ""
            lines = logs.split(b"\n")
            for line in lines[1:]:
                line = line.decode()
                print("line is:" + line)
                if len(line) < 50:
                    continue
                line = line.replace("\t", "")
                csvs = csv.reader([str(line)])
                for row in csvs:
                    cols = row

                print("cols is " + str(cols))
                c = 0
                fields = {}
                for col in format:
                    fields[col] = cols[c].replace('"', "")
                    c += 1
                print(fields)

                start = float(fields["time_micros"])
                end = start + float(fields["time_taken_micros"])
                bytes = int(fields["sc_bytes"])
                if (bytes == 0 or fields["cs_method"] != "GET"
                        or int(fields["sc_status"]) >= 400):
                    continue

                start /= 1_000_000.0
                end /= 1_000_000.0

                # /download/storage/v1/b/ncbi_sra_realign/o/ERR1620370.summary?generation=1545190393681782&alt=media
                acc = fields["cs_uri"]
                acc = acc.split("?")[0]
                acc = acc.split("/")[-1]
                # Christiam suggests ignoring extension
                acc = acc.rsplit(".", 1)[0]
                if acc == "o":
                    print("Huh" + fields["cs_uri"])
                tsv = (
                    fields["c_ip"],
                    acc,
                    fields["cs_user_agent"] + " (hackathon)",
                    fields["sc_status"],
                    fields["cs_host"],
                    str(start),
                    str(end),
                    str(bytes),
                    "1",
                )

                body += "\t".join(tsv) + "\n"
            print("\bPosting: " + body)
            conn.request("POST", "/blast_tsv", body=body)
            response = conn.getresponse()
            o = response.read().decode()
            o.replace("\n", "")
            print("HTTP Response was", response.status, response.reason, o)

            db[log_file.public_url] = ""
Example #24
0
def CFCorp():  # data, context
    creds = None
    storage_client = storage.Client()
    if storage_client.get_bucket('hc_tokens_scripts').blob(
            'Tokens/Reporting-token.pickle').exists():
        with gcsfs.GCSFileSystem(project="hireclix").open(
                'hc_tokens_scripts/Tokens/Reporting-token.pickle',
                'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        with gcsfs.GCSFileSystem(project="hireclix").open(
                'hc_tokens_scripts/Tokens/Reporting-token.pickle',
                'wb') as token:
            pickle.dump(creds, token)

    service = build('gmail', 'v1', credentials=creds)
    userId = 'me'
    labelid = 'Label_34715380858048669'
    query = '*****@*****.**'
    messages = service.users().messages().list(userId=userId,
                                               q=query,
                                               labelIds=labelid).execute()

    def multiple_replace(dict, text):
        # Create a regular expression  from the dictionary keys
        regex = re.compile("(%s)" % "|".join(map(re.escape, dict.keys())))
        # For each match, look-up corresponding value in dictionary
        return regex.sub(lambda mo: dict[mo.string[mo.start():mo.end()]],
                         text).strip()

    regexes = {' ': '', '-': '', '(BL)': ''}

    for m_id in messages['messages']:
        messagemeta = service.users().messages().get(userId=userId,
                                                     id=m_id['id']).execute()

        dates = parser.parse(
            re.sub(
                "^.*,|-.*$", "",
                messagemeta['payload']['headers'][1]['value']).strip()).date()

        today = datetime.today().date()

        if dates == today:

            attachment = messagemeta['payload']['parts'][1]['body'][
                'attachmentId']
            attachments = service.users().messages().attachments().get(
                userId=userId, messageId=messagemeta['id'],
                id=attachment).execute()
            f = base64.urlsafe_b64decode(attachments['data'])
            toread = io.BytesIO()
            toread.write(f)
            toread.seek(0)

            dataframe = pd.read_csv(toread, header=0)

            pd.set_option('display.max_rows', 500)
            pd.set_option('display.max_columns', 500)
            pd.set_option('display.width', 1000)

            reformattedcolumns = []

            for column in dataframe.columns:
                reformattedcolumns.append(multiple_replace(regexes, column))

            dataframe.columns = reformattedcolumns

            dataframe['SubmissionCompletedDate'] = pd.to_datetime(
                dataframe['SubmissionCompletedDate'], errors='coerce').dt.date

            pandas_gbq.to_gbq(dataframe,
                              'CountryFinancial.CountryFinancial_ATS',
                              project_id='hireclix',
                              if_exists='replace',
                              table_schema=[{
                                  'name': 'SubmissionCompletedDate',
                                  'type': 'DATE'
                              }])

            dataframe.to_csv('gs://hc_countryfinincialcorp/CF:Corp_ats_' +
                             str(today) + ".csv")
            print(dataframe)
Example #25
0
def create_report(BUCKET, gcsfilename, tmpdir):
    """
    Creates report in gs://BUCKET/ based on contents in gcsfilename (gs://bucket/some/dir/filename)
    """
    # connect to BigQuery
    client = bigquery.Client()
    destination_table = client.get_table('sparktobq.kdd_cup')

    # Specify table schema. Autodetect is not a good idea for production code
    job_config = bigquery.LoadJobConfig()
    schema = [
        bigquery.SchemaField("duration", "INT64"),
    ]
    for name in ['protocol_type', 'service', 'flag']:
        schema.append(bigquery.SchemaField(name, "STRING"))
    for name in 'src_bytes,dst_bytes,wrong_fragment,urgent,hot,num_failed_logins'.split(
            ','):
        schema.append(bigquery.SchemaField(name, "INT64"))
    schema.append(bigquery.SchemaField("unused_10", "STRING"))
    schema.append(bigquery.SchemaField("num_compromised", "INT64"))
    schema.append(bigquery.SchemaField("unused_12", "STRING"))
    for name in 'su_attempted,num_root,num_file_creations'.split(','):
        schema.append(bigquery.SchemaField(name, "INT64"))
    for fieldno in range(16, 41):
        schema.append(
            bigquery.SchemaField("unused_{}".format(fieldno), "STRING"))
    schema.append(bigquery.SchemaField("label", "STRING"))
    job_config.schema = schema

    # Load CSV data into BigQuery, replacing any rows that were there before
    job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
    job_config.skip_leading_rows = 0
    job_config.source_format = bigquery.SourceFormat.CSV
    load_job = client.load_table_from_uri(gcsfilename,
                                          destination_table,
                                          job_config=job_config)
    print("Starting LOAD job {} for {}".format(load_job.job_id, gcsfilename))
    load_job.result()  # Waits for table load to complete.
    print("Finished LOAD job {}".format(load_job.job_id))

    # connections by protocol
    sql = """
        SELECT COUNT(*) AS count
        FROM sparktobq.kdd_cup
        GROUP BY protocol_type
        ORDER by count ASC    
    """
    connections_by_protocol = client.query(sql).to_dataframe()
    connections_by_protocol.to_csv(
        os.path.join(tmpdir, "connections_by_protocol.csv"))
    print("Finished analyzing connections")

    # attacks plot
    sql = """
                            SELECT 
                             protocol_type, 
                             CASE label
                               WHEN 'normal.' THEN 'no attack'
                               ELSE 'attack'
                             END AS state,
                             COUNT(*) as total_freq,
                             ROUND(AVG(src_bytes), 2) as mean_src_bytes,
                             ROUND(AVG(dst_bytes), 2) as mean_dst_bytes,
                             ROUND(AVG(duration), 2) as mean_duration,
                             SUM(num_failed_logins) as total_failed_logins,
                             SUM(num_compromised) as total_compromised,
                             SUM(num_file_creations) as total_file_creations,
                             SUM(su_attempted) as total_root_attempts,
                             SUM(num_root) as total_root_acceses
                           FROM sparktobq.kdd_cup
                           GROUP BY protocol_type, state
                           ORDER BY 3 DESC
    """
    attack_stats = client.query(sql).to_dataframe()
    ax = attack_stats.plot.bar(x='protocol_type',
                               subplots=True,
                               figsize=(10, 25))
    ax[0].get_figure().savefig(os.path.join(tmpdir, 'report.png'))
    print("Finished analyzing attacks")

    bucket = gcs.Client().get_bucket(BUCKET)
    for blob in bucket.list_blobs(prefix='sparktobq/'):
        blob.delete()
    for fname in ['report.png', 'connections_by_protocol.csv']:
        bucket.blob('sparktobq/{}'.format(fname)).upload_from_filename(
            os.path.join(tmpdir, fname))
    print("Uploaded report based on {} to {}".format(gcsfilename, BUCKET))
Example #26
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--network-name', type=str, required=True)
    parser.add_argument('-d', '--dataset-name', type=str, required=True)
    parser.add_argument('-p', '--pretrained', type=bool, default=False)
    parser.add_argument('-dp', '--data-parallel', type=bool, default=True)
    parser.add_argument('--test-images-path',
                        type=str,
                        default=default_test_images)
    parser.add_argument('-l', '--load')
    parser.add_argument('--batchSz', type=int, default=32)
    parser.add_argument('--save')
    args = parser.parse_args()

    args.cuda = torch.cuda.is_available()

    print("using cuda: ", args.cuda)

    args.save = args.save or 'work/%s/%s' % \
                                (args.network_name, args.dataset_name)
    setproctitle.setproctitle('work/%s/%s-test' % \
                                (args.network_name, args.dataset_name))

    if not os.path.exists(args.save):
        raise ValueError('save directory not found')

    kwargs = {'batch_size': args.batchSz}

    testLoader = get_testloader(args, **kwargs)

    if args.load:
        print("Loading network: {}".format(args.load))
        net = torch.load(args.load)
    else:
        load_path = 'work/%s/%s' % (args.network_name, args.dataset_name)
        files = [f for f in os.listdir(load_path) if \
                            os.path.isfile(os.path.join(load_path, f)) \
                            and '.pth' in f]
        current = max([int(i.replace('.pth', '')) for i in files])
        model_path = os.path.join(load_path, str(current) + '.pth')
        net = torch.load(model_path)

    if args.cuda:
        net = net.cuda()

    now = datetime.datetime.now(
        tz=pytz.timezone("US/Mountain")).strftime("%Y-%m-%d___%H:%M:%S")
    predict_csv_path = os.path.join(
        args.save, '{}_{}_predict.csv'.format(BRANCH_NAME, now))

    try:
        os.remove(predict_csv_path)  # remove if already created
    except:
        print("predict_csv_path does not exist")

    predF = open(predict_csv_path, 'a')

    predict(args, net, testLoader, predF)

    predF.close

    if len(CLOUD_STORAGE_BUCKET) != 0:
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(CLOUD_STORAGE_BUCKET)
        blob = bucket.blob(predict_csv_path)

        blob.upload_from_filename(predict_csv_path)
Example #27
0
 def __init__(self):
     import google.auth
     google.auth.default()
     from google.cloud import storage
     self._client = storage.Client()
 def __init__(self, bucket):
     self.client = storage.Client()
     self.bucket = self.client.get_bucket(bucket)
     self.local_mapping = {}
Example #29
0
import re
import os
import base64
import time
import sys
import numpy as np
import uuid
import tensorflow as tf
from flask import request, jsonify
from google.cloud import storage

model = None
BUCKET = os.environ.get('GCS_BUCKET')
storage_client = storage.Client()

labels = {'paper': 0, 'rock': 1, 'scissors': 2}

MY_MODEL = 'checkpoint-model_tl_ep60_160x160.h5'
FILENAME_TEMPLATE = '{}.jpg'
PREDICT_IMAGE_WIDTH = 160
PREDICT_IMAGE_HEIGHT = 160

if not os.path.exists('/tmp/model'):
  os.makedirs('/tmp/model')

def load_model():
  global model
  if not os.path.exists('/tmp/model/' + MY_MODEL):
      download_blob(BUCKET, MY_MODEL, '/tmp/model/' + MY_MODEL)

  path = '/tmp/model/' + MY_MODEL
Example #30
0
def filesupload():
    if request.method == "POST":
        file = request.files["file"]
        csp = request.form["csp"]
        filetype = request.form["filetype"]
        qcfname = request.form["qcfname"]

        if filetype == "Quiz":
            path = '/usr/src/app/quiz/'
            file.save(os.path.join(path, file.filename))
            database = pymysql.connect(user=db_user,
                                       password=db_password,
                                       host=host,
                                       db=db_name)
            cursor = database.cursor()
            # Open the workbook and define the worksheet
            bookpath = path + file.filename
            book = xlrd.open_workbook(bookpath)
            sheet = book.sheet_by_name("source")
            # Create the INSERT INTO sql query
            query = "INSERT INTO {}quiz (quizname,question,option1,option2,option3,option4,correct) VALUES (%s, %s, %s, %s, %s, %s, %s)".format(
                csp)
            # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
            for r in range(1, sheet.nrows):
                question = sheet.cell(r, 0).value
                option1 = sheet.cell(r, 1).value
                option2 = sheet.cell(r, 2).value
                option3 = sheet.cell(r, 3).value
                option4 = sheet.cell(r, 4).value
                correct = sheet.cell(r, 5).value
                values = (qcfname, question, option1, option2, option3,
                          option4, correct)
                # Execute sql Query
                cursor.execute(query, values)
                database.commit()
            # Close the cursor
            cursor.close()
            database.close()
            return render_template("adminpage.html")
        elif filetype == "Flash Cards":
            path = '/usr/src/app/cards/'
            file.save(os.path.join(path, file.filename))
            database = pymysql.connect(user=db_user,
                                       password=db_password,
                                       host=host,
                                       db=db_name)
            cursor = database.cursor()
            # Open the workbook and define the worksheet
            bookpath = path + file.filename
            book = xlrd.open_workbook(bookpath)
            sheet = book.sheet_by_name("source")
            # Create the INSERT INTO sql query
            query = "INSERT INTO {}cards (question, answer) VALUES (%s, %s)".format(
                csp)
            # Create a For loop to iterate through each row in the XLS file, starting at row 2 to skip the headers
            # Close the cursor
            for r in range(1, sheet.nrows):
                question = sheet.cell(r, 0).value
                answer = sheet.cell(r, 1).value
                values = (question, answer)
                # Execute sql Query
                cursor.execute(query, values)
                database.commit()
            cursor.close()
            database.close()
            return render_template("adminpage.html")
        else:
            #filetype == "Course Content"
            path = '/usr/src/app/pdfs/'
            file.save(os.path.join(path, file.filename))
            database = pymysql.connect(user=db_user,
                                       password=db_password,
                                       host=host,
                                       db=db_name)
            cursor = database.cursor()
            query = "INSERT INTO content (csp, contentname, filelink) VALUES (%s, %s,%s)"
            values = (csp, qcfname, file.filename)
            cursor.execute(query, values)
            database.commit()
            cursor.close()
            database.close()

            destination_blob_name = "content/" + file.filename
            bucket_name = "certdetsimage"
            source_file_name = path + file.filename
            storage_client = storage.Client()
            bucket = storage_client.bucket(bucket_name)
            blob = bucket.blob(destination_blob_name)
            blob.upload_from_filename(source_file_name)
            return render_template("adminpage.html")
    else:
        #for post method else
        pass
    return render_template("adminpage.html")