Ejemplo n.º 1
0
def doSelectShowPayload(bucketName, objectName, hostName, selectExpression):
    
    #startTime = datetime.datetime.now()
    
    #setup client, use mc.getMinioHostInfo().finctions to look up URL, accessKey, and secretKey
    s3 = boto3.client('s3',
                      endpoint_url=mc.getMinioHostInfo().getURL(hostName),
                      aws_access_key_id=mc.getMinioHostInfo().getAccessKey(hostName),
                      aws_secret_access_key=mc.getMinioHostInfo().getSecretKey(hostName),
                      region_name='us-east-1')
        
    
    #make the select_object_content call... returns a stream
    #TODO: assumes dataset is CSV and is not compressed... should relax this
    eventStream = s3.select_object_content(
                            Bucket=bucketName,
                            Key=objectName,
                            ExpressionType='SQL',
                            Expression=selectExpression,
                            InputSerialization={
                                                'CSV': {
                                                        "FileHeaderInfo": "USE",
                                                        },
                                                'CompressionType': 'NONE',
                                                },
                            OutputSerialization={'CSV': {}},
                            )
    
    #iterate through the response (eventStream)
    for event in eventStream['Payload']:
        #debugging code - totally messes up output
        print(event)
        if 'Records' in event:
            record = event['Records']['Payload'].decode('utf-8')
            print(record, end="")
        elif 'Stats' in event:
            statsDetails = event['Stats']['Details']
            bs = statsDetails['BytesScanned']
            bp = statsDetails['BytesProcessed']
            
            print("Stats details bytesScanned: ", bs)
            print("Stats details bytesProcessed: ", bp)
Ejemplo n.º 2
0
def getColumntHeaders(bucketName, objectName, hostName, delim=","):
    
    endpoint = mc.getMinioHostInfo().getURL(hostName)
    secureFlag = ("https://" in endpoint)
    print(secureFlag)
    
    #setup client, use mc.getMinioHostInfo().finctions to look up URL, accessKey, and secretKey
    s3 = boto3.resource('s3',
                          endpoint_url=endpoint,
                          aws_access_key_id=mc.getMinioHostInfo().getAccessKey(hostName),
                          aws_secret_access_key=mc.getMinioHostInfo().getSecretKey(hostName),verify =False
                      #    is_secure=secureFlag
                        )
    #setup object (o) based on bucketName and objectName
    o = s3.Object(bucketName, objectName)

    #read first line (returns bytes-like object), decode as 'utf-8'...
    #split indo list of column names using delim
    columns = o.get()['Body']._raw_stream.readline().decode('utf-8').split(delim)
    
    #return linst of column header names
    return(columns)
Ejemplo n.º 3
0
def iterateThroughTests(whichHosts):

    #optionally override quiet..
    #quiet = True

    for s in TestSelectExpressions :
        printSelectExpression(s)
        
        for t in TestDatasets :
            h = t['host']
            if h in whichHosts:
                alias = mc.getMinioHostInfo().getAlias(h)
                if alias != "": 
                    print(">>> Querrying '", h, "'...", sep="")
                    doSelect( t['bucket'], t['object'], h, s, quiet)
                else:
                    print("ERROR: No host matching '", h, "' found is configured.", sep="")
    print()
def showHostInfo(hostName):

    print('Host config information for ', hostName, sep="")
    alias = mc.getMinioHostInfo().getAlias(hostName)
    if alias != "":
        print("   status:", mc.getMinioHostInfo().getStatus(hostName))
        print("   alias:", mc.getMinioHostInfo().getAlias(hostName))
        print("   URL: ", mc.getMinioHostInfo().getURL(hostName))
        print("   accessKey:", mc.getMinioHostInfo().getAccessKey(hostName))
        print("   secretKey:", mc.getMinioHostInfo().getSecretKey(hostName))
        print("   api: ", mc.getMinioHostInfo().getAPI(hostName))
    else:
        print("-- No host matching '",
              hostName,
              "' found is configured.",
              sep="")
    print()
Ejemplo n.º 5
0
    doSelectShowPayload("sjm-airlines", "DelayedFlights.csv", h, s)
    print()


#main
if __name__ == "__main__":

    #supress printing extra information (eg: quiet supressing printing data returned
    #by the slect query against th data set)
    quiet = True

    metrics = list()

    #create dictionary of information about hosts configured in minio client (mc) config file.
    #dictionary will cotain all info (url, accessKey, secretKey) etc
    hostDict = mc.getMinioHostInfo()

    #True to test individual select calls, False to skip
    if False:
        testIndividualSelectCalls()

    #True to test all the select statemetns against all the hosts, False to skip
    if False:
        whichHosts = ['s3', 'play', 'm0', 'z0']
        iterateThroughTests(whichHosts)

        showGraphs = False
        processMetrics(metrics, quiet, showGraphs)

    if True:
        showHarshaPayloadBug()
Ejemplo n.º 6
0
def doSelect(bucketName, objectName, hostName, selectExpression, quiet):
    
    startTime = datetime.datetime.now()
    
    endpoint = mc.getMinioHostInfo().getURL(hostName)
    secureFlag = ("https://" in endpoint)
    print(secureFlag)
    
    #setup client, use mc.getMinioHostInfo().finctions to look up URL, accessKey, and secretKey
    s3 = boto3.client('s3',
                      endpoint_url=endpoint,
                      aws_access_key_id=mc.getMinioHostInfo().getAccessKey(hostName),
                      aws_secret_access_key=mc.getMinioHostInfo().getSecretKey(hostName),
                      #is_secure=secureFlag,
                      verify=False,
                      region_name='us-east-1')
        
    
    #make the select_object_content call... returns a stream
    #TODO: assumes dataset is CSV and is not compressed... should relax this
    eventStream = s3.select_object_content(
                            Bucket=bucketName,
                            Key=objectName,
                            ExpressionType='SQL',
                            Expression=selectExpression,
                            InputSerialization={
                                                'CSV': {
                                                        "FileHeaderInfo": "USE",
                                                        },
                                                'CompressionType': 'NONE',
                                                },
                            OutputSerialization={'CSV': {}},
                            )
    
    #iterate through the response (eventStream)
    for event in eventStream['Payload']:
        #debugging code - totally messes up output
        #print(event)
        if 'Records' in event:
            record = event['Records']['Payload'].decode('utf-8')
            if not quiet :
                print(record, end="")
        elif 'Stats' in event:
            statsDetails = event['Stats']['Details']
            bs = statsDetails['BytesScanned']
            bp = statsDetails['BytesProcessed']
            if not quiet : 
                print("Stats details bytesScanned: ", bs)
                print("Stats details bytesProcessed: ", bp)
                
    if quiet: 
        print("**DONE - (Output not echoed!)**")
        
    endTime = datetime.datetime.now() 
    elapsedTime = printElapsedTime(startTime, endTime, quiet)
    elapsedTimeSecs = datetime.timedelta.total_seconds(elapsedTime)
    
    metrics.append({"expression": selectExpression, 
                    'host' : hostName, 
                    'bucket': bucketName, 
                    'object': objectName, 
                    'elapsedTimeDays': elapsedTime,
                    'elapsedTimeSecs': elapsedTimeSecs,
                    'bytesScanned': bs, 
                    'bytesProcessed': bp
                    })
Ejemplo n.º 7
0
def printHostInfo( hostName):
    print("Host '", hostName, "' (", mc.getMinioHostInfo().getURL(hostName), ")", sep="" )
Ejemplo n.º 8
0
    doSelectShowPayload( "sjm-airlines", "DelayedFlights.csv", h, s)
    print()
    
        
#main    
if __name__ == "__main__" :
    #os.environ['SSL_CERT_FILE'] = 'prgx_ca.pem'
    
    #supress printing extra information (eg: quiet supressing printing data returned 
    #by the slect query against th data set)
    quiet = True
    
    metrics = list()
    
    #initialize the getMinioHostInfo() class
    mc.getMinioHostInfo()
    
    
    #True to test individual select calls, False to skip
    if True:
        testIndividualSelectCalls()
        
        #showGraphs = True
        #processMetrics( metrics, quiet, showGraphs)
        
    #True to test all the select statemetns against all the hosts, False to skip
    if False:
        whichHosts = [
                'c1'
                ]
        iterateThroughTests(whichHosts)