def vision_client():
    """Create vision client."""
    return vision.ImageAnnotatorClient()
Beispiel #2
0
def extract_words(img, height, width, ocr_engine='pytesseract'):
    if ocr_engine == 'pytesseract':
        data = pytesseract.image_to_data(img, output_type=Output.DICT)
        n_boxes = len(data['text'])
        words = [
            {
                'text': data['text'][i],
                'left': data['left'][i],
                'top': data['top'][i],
                'right': data['left'][i] + data['width'][i],
                'bottom': data['top'][i] + data['height'][i]
            }
            for i in range(n_boxes) if data['text'][i]
        ]
        return words

    
    elif ocr_engine=='google_ocr':
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        client = vision.ImageAnnotatorClient()
        content=img_byte_arr
        image_ = vision.Image(content=content)
        response = client.text_detection(image=image_)
        texts = response.text_annotations

        words=[]
        first=True
        for text in texts:
            if first:
                first=False
                continue
            data={}
            data['text']=text.description
            x_vert=[]
            y_vert=[]
            for vertex in text.bounding_poly.vertices:
                x_vert.append(vertex.x)
                y_vert.append(vertex.y)
            data['left']=min(x_vert)
            data['right']=max(x_vert)
            data['top']=min(y_vert)
            data['bottom']=max(y_vert)
            words.append(data)
        return words
    
    
    elif ocr_engine == 'aws_textract':

        import boto3

        # use aws textract
        client = boto3.client('textract')

        # convert PpmImageFile to byte
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()

        # call aws-textract API
        response = client.detect_document_text(Document={'Bytes': img_byte_arr})

        # get image weight and height to convert normalized coordinate from response
        words = [
            {
                'text': data['Text'],
                'left': math.floor((data['Geometry']['BoundingBox']['Left']) * width),
                'top': math.floor((data['Geometry']['BoundingBox']['Top']) * height),
                'right': math.ceil(
                    (data['Geometry']['BoundingBox']['Left'] + data['Geometry']['BoundingBox']['Width']) * width),
                'bottom': math.ceil(
                    (data['Geometry']['BoundingBox']['Top'] + data['Geometry']['BoundingBox']['Height']) * height)
            } for data in response['Blocks'] if "Text" in data
        ]
        return words
Beispiel #3
0
def lambda_handler(event, context):
    client = vision.ImageAnnotatorClient()
    print(client)
Beispiel #4
0
def p2a_ocr_pdf(bucket, pdf_blob):
    """
    https://cloud.google.com/vision/docs/pdf
    """

    # define the input config
    gcs_source_uri = "gs://{}/{}".format(bucket.name, pdf_blob.name)
    gcs_source = vision.GcsSource(uri=gcs_source_uri)

    # Supported mime_types are: 'application/pdf' and 'image/tiff'
    mime_type = 'application/pdf'

    # How many pages should be grouped into each json output file.
    batch_size = 2

    client = vision.ImageAnnotatorClient()

    feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)

    gcs_source = vision.GcsSource(uri=gcs_source_uri)
    input_config = vision.InputConfig(gcs_source=gcs_source,
                                      mime_type=mime_type)

    # define output config
    pdf_id = pdf_blob.name.replace(".pdf",
                                   "")[:4]  # use the first 4 chars as pdf_id
    gcs_destination_uri = "gs://{}/{}".format(bucket.name, pdf_id + "_")

    gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
    output_config = vision.OutputConfig(gcs_destination=gcs_destination,
                                        batch_size=batch_size)

    async_request = vision.AsyncAnnotateFileRequest(
        features=[feature],
        input_config=input_config,
        output_config=output_config)

    operation = client.async_batch_annotate_files(requests=[async_request])

    print('Waiting for the operation to finish.')
    operation.result(timeout=420)

    # Once the request has completed and the output has been
    # written to GCS, we can list all the output files.
    # storage_client = storage.Client()

    # match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
    # bucket_name = match.group(1)
    # prefix = match.group(2)

    # bucket = storage_client.get_bucket(bucket_name)

    # # List objects with the given prefix.
    # blob_list = list(bucket.list_blobs(prefix=prefix))
    # print('Output files:')
    # for blob in blob_list:
    #     print(blob.name)

    # # Process the first output file from GCS.
    # # Since we specified batch_size=2, the first response contains
    # # the first two pages of the input file.
    # output = blob_list[0]

    # json_string = output.download_as_string()
    # response = json.loads(json_string)

    # # The actual response for the first page of the input file.
    # first_page_response = response['responses'][0]
    # annotation = first_page_response['fullTextAnnotation']

    # # Here we print the full text from the first page.
    # # The response contains more information:
    # # annotation/pages/blocks/paragraphs/words/symbols
    # # including confidence scores and bounding boxes
    # print('Full text:\n')
    # print(annotation['text'])

    # convert PDF to PNG files for annotation
    if ANNOTATION_MODE:
        convert_pdf2png(bucket, pdf_blob)
 def __init__(self):
     self.img = "mirror.jpg"
     self.client = vision.ImageAnnotatorClient()
def crop_flooded_objects_boundary(google_api_key, file_list):

    credentials = service_account.Credentials.from_service_account_file(
        google_api_key)

    object_dict = {}
    cropped_images = []

    # creating a tenth and cycle counter to output progress of function
    tenth_counter = 0
    cycle_counter = 1

    #looping though each image in the list submitted to the function
    for file_item in file_list:

        #need to have google vision credentials saved to credentials
        client = vision.ImageAnnotatorClient(credentials=credentials)

        # path to the images that need to be cropped
        with open(file_item, 'rb') as image_file:
            content = image_file.read()
        image = vision.types.Image(content=content)

        #same path just using OpenCV to get image shape and will use to save the cropped images later
        im_cv2 = cv2.imread(file_item)
        height, width, color = im_cv2.shape

        #Using Google vision to actually find objects in the image
        objects = client.object_localization(
            image=image).localized_object_annotations

        tenth_counter += 1

        file_crops = []
        #looping through each of the objects Google vision found in the image
        for object_ in objects:
            # ignoring all objects that don't have to do with the cars in the image
            if object_.name in objects_to_crop_around:
                vertex_dict = {}

                #need to make sure the normalized vertex are multipled by the corresponding image distance so the vertex are in pixels counts
                for index, vertex in enumerate(
                        object_.bounding_poly.normalized_vertices):
                    vertex_dict[f'vertex_{index}'] = [
                        int(width * vertex.x),
                        int(height * vertex.y)
                    ]
                object_dict[object_.name] = vertex_dict

                # Cropping the image around the vertices of the object

                # https://www.life2coding.com/cropping-polygon-or-non-rectangular-region-from-image-using-opencv-python/
                # https://stackoverflow.com/questions/48301186/cropping-concave-polygon-from-image-using-opencv-python

                mask = np.zeros(im_cv2.shape[:2], np.uint8)
                points = np.array([
                    object_dict[object_.name]['vertex_0'],
                    object_dict[object_.name]['vertex_1'],
                    object_dict[object_.name]['vertex_2'],
                    object_dict[object_.name]['vertex_3']
                ])

                #creating the bounding rectangle from the object vertices
                rect = cv2.boundingRect(points)
                x, y, w, h = rect

                # cropping the image using OpenCV and the dimentions of the bounding rectangle
                cropped = im_cv2[y:y + h, x:x + w].copy()

                file_crops.append(
                    Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB)))

        cropped_images.append(file_crops)

    return cropped_images
Beispiel #7
0
def hello_world(request):

    from google.cloud import vision
    from datetime import datetime
    import re
    import itertools
    import write2bq
    #from google.oauth2 import service_account
    import os
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="C:\gcp_credentials\elaborate-howl-285701-105c2e8355a8.json"
    #SCOPES = ['https://www.googleapis.com/auth/sqlservice.admin']
    #SERVICE_ACCOUNT_FILE = 'C:\gcp_credentials\elaborate-howl-285701-105c2e8355a8.json'
    #credentials = service_account.Credentials.from_service_account_file(
    #    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    table_id='elaborate-howl-285701.context.image_web_entities'#destination table name

    now = str(datetime.now())# time

    print("now="+now)

    client = vision.ImageAnnotatorClient()
    request_json = request.get_json()
    image = vision.Image()
    if request_json:
        source_url = request_json['source_url']
        print("source_url="+source_url)

    source_url=re.match(r'gs://([^/]+)/(.+)', source_url) 
    bucket_name=source_url.group(1) #credential bucket name
    print(bucket_name)
    prefix=source_url.group(2)# credential prefix name
    print(prefix)



    file_name=prefix
    exact_file_name_list = re.split("/", file_name)
    exact_file_name=exact_file_name_list[-1]
    



    uri="gs://"+bucket_name+"/"+file_name
    print("uri="+uri)

    image.source.image_uri = uri

    response = client.web_detection(image=image)
    matching_images_lst=[]
    matching_images=response.web_detection.full_matching_images# url string in it creates problem from json
    for matching_image in matching_images:
        matching_images_lst.append(matching_image.url)
    # list is made for matching images
    page_lst=[]
    for page in response.web_detection.pages_with_matching_images:
        page_lst.append(page.url)
    # list is made for pages
    best_match_lst=[]#list empty which stores best match result
    for best_match in response.web_detection.best_guess_labels:
        best_match_lst.append(best_match.label)

    for (a, b, c) in itertools.zip_longest(matching_images_lst, page_lst, best_match_lst): 
        documentEntities={"time_stamp":now,"file_name":exact_file_name,"matching_images":a,"pages_with_images":b,"best_guess":c,"input_uri":uri}
        write2bq.BQ(documentEntities,table_id)
    
    return "success"
     


     
     
     




     
Beispiel #8
0
import io
import os
# Imports the Google Cloud client library
from google.cloud import vision
print "a"
# Instantiates a client (Change the line below******)
vision_client = vision.ImageAnnotatorClient(credentials='My Project-6748fa243896.json')
print "b"
# The name of the image file to annotate (Change the line below 'image_path.jpg' ******)
file_name = os.path.join(os.path.dirname(__file__), 'locol1.png') # Your image path from current directory
print "c"
# Loads the image into memory
with io.open(file_name, 'rb') as image_file:
    print "d"
    content = image_file.read()
    print content
    image = vision_client.

        annotate_image(content=content)

print  "e"
# Performs label detection on the image file
labels = image.detect_labels()
print "f"
print('Labels:')
for label in labels:
    print(label.description)
    print("g")
print("i")
Beispiel #9
0
def imagecheck():
    ans = ""
    ans1 = "You are dressed perfectly for the temperature"
    warmth = 0
    wet = 0
    api_result = requests.get('http://api.weatherstack.com/current', params)
    api_response = api_result.json()
    temp = api_response['current']['temperature']
    precip_rain = api_response['current']['precip']
    precip_snow = 0
    wind = api_response['current']['wind_speed']
    weatherCode = api_response['current']['weather_code']
    # less than 113 is sunny
    if (weatherCode <= 113):
        weather = "sunny"
    # between 113 and 143 is cloudy
    elif (weatherCode <= 143):
        weather = "cloudy"
    # between 122 and 230 is snowing
    elif (weatherCode < 230):
        weather = "snow"
    # between 230 and 326
    elif (weatherCode < 326):
        weather = "rain"
    # everything above 326 is snow again
    else:
        weather = "snow"
    # set the precipitation to either snow or rain
    # default set to rain, but if its snowing then swap
    if (weather == "snow"):
        precip_snow = precip_rain
        precip_rain = 0
    import csv
    csv_columns = [
        'Wind Speed', 'average temperature', 'precipitation', 'snow fall'
    ]
    dict_data = [{
        'Wind Speed': wind,
        'average temperature': temp,
        'precipitation': precip_rain,
        'snow fall': precip_snow
    }]
    csv_file = "Names.csv"
    try:
        with open(csv_file, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            writer.writeheader()
            for data in dict_data:
                writer.writerow(data)
    except IOError:
        print("I/O error")
    data = pd.read_csv("Names.csv")
    model = pickle.load(open('model.sav', 'rb'))
    output = model.predict(data)
    out = output.tolist()
    wetp = int((out[0] - out[0] % 10) / 10)
    warmp = (out[0] % 10)
    if request.method == 'POST':
        file = request.files['pic']
        file.filename = "image.jpg"
        file_name = file.filename
        file.save(file_name)
        with open('./image.jpg', 'rb') as image_file:
            content = image_file.read()
        client = vision.ImageAnnotatorClient()
        response = client.label_detection({'content': content})
        labels = response.label_annotations
        print('Labels:')
        for label in labels[0:8]:
            print(label.description)
            if (label.description == "Jacket" or label.description == "Coat"):
                warmth = warmth + 7
            if (label.description == "Sweater"
                    or label.description == "Hoodie"):
                warmth = warmth + 5
            if (label.description == "Hat"):
                warmth += 1
                wet = 1
            if (label.description == "gloves"):
                wet = 3
            if (label.description == "Boots"):
                wet = 4
                warmth += 1
            if (label.description == "Hood" or label.description == "Hoodie"):
                wet = 1
            if (label.description == "Umbrella"):
                wet = 2

        if (wetp > 2 and wet < 3):
            ans = "also you might want to be careful of snow"
        elif (wetp > 0 and wet < 1):
            ans = "also you might want to grab an umbrella"
        elif (wetp == 0 and wet > 0 and wet < 3):
            ans = "also you probably won't need that umbrella"
        if ((warmp - warmth) < -1):
            ans1 = "You may be overdressed for the weather"
        elif ((warmp - warmth) > 1):
            ans1 = "You may be underdressed for the weather"

    out = ans1 + " " + ans
    return out
Beispiel #10
0
def upload_photo():
    photo = request.files["file"]

    # Create a Cloud Storage client.
    storage_client = storage.Client()

    # Get the bucket that the file will be uploaded to.
    bucket = storage_client.get_bucket(CLOUD_STORAGE_BUCKET)

    # Create a new blob and upload the file's content.
    blob = bucket.blob(photo.filename)
    blob.upload_from_string(photo.read(), content_type=photo.content_type)

    # Make the blob publicly viewable.
    blob.make_public()

    # Create a Cloud Vision client.
    vision_client = vision.ImageAnnotatorClient()

    # Use the Cloud Vision client to detect a face for our image.
    source_uri = "gs://{}/{}".format(CLOUD_STORAGE_BUCKET, blob.name)
    image = vision.Image(source=vision.ImageSource(gcs_image_uri=source_uri))
    faces = vision_client.face_detection(image=image).face_annotations

    # If a face is detected, save to Datastore the likelihood that the face
    # displays 'joy,' as determined by Google's Machine Learning algorithm.
    if len(faces) > 0:
        face = faces[0]

        # Convert the likelihood string.
        likelihoods = [
            "Unknown",
            "Very Unlikely",
            "Unlikely",
            "Possible",
            "Likely",
            "Very Likely",
        ]
        face_joy = likelihoods[face.joy_likelihood]
    else:
        face_joy = "Unknown"

    # Create a Cloud Datastore client.
    datastore_client = datastore.Client()

    # Fetch the current date / time.
    current_datetime = datetime.now()

    # The kind for the new entity.
    kind = "Faces"

    # The name/ID for the new entity.
    name = blob.name

    # Create the Cloud Datastore key for the new entity.
    key = datastore_client.key(kind, name)

    # Construct the new entity using the key. Set dictionary values for entity
    # keys blob_name, storage_public_url, timestamp, and joy.
    entity = datastore.Entity(key)
    entity["blob_name"] = blob.name
    entity["image_public_url"] = blob.public_url
    entity["timestamp"] = current_datetime
    entity["joy"] = face_joy

    # Save the new entity to Datastore.
    datastore_client.put(entity)

    # Redirect to the home page.
    return redirect("/")
Beispiel #11
0
def detect_web(path):
    from google.cloud import vision
    """Detects web annotations given an image."""

    assert os.path.exists(path) and os.path.isfile(path)
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.web_detection(image=image)
    annotations = response.web_detection

    if annotations.best_guess_labels:
        for label in annotations.best_guess_labels:
            print('\nBest guess label: {}'.format(label.label))

    if annotations.pages_with_matching_images:
        print('\n{} Pages with matching images found:'.format(
            len(annotations.pages_with_matching_images)))

        # for page in annotations.pages_with_matching_images:
        #     print('\n\tPage url   : {}'.format(page.url))

        #     if page.full_matching_images:
        #         print('\t{} Full Matches found: '.format(
        #                len(page.full_matching_images)))

        #         for image in page.full_matching_images:
        #             print('\t\tImage url  : {}'.format(image.url))

        #     if page.partial_matching_images:
        #         print('\t{} Partial Matches found: '.format(
        #                len(page.partial_matching_images)))

        #         for image in page.partial_matching_images:
        #             print('\t\tImage url  : {}'.format(image.url))

    # if annotations.web_entities:
    #     print('\n{} Web entities found: '.format(
    #         len(annotations.web_entities)))

    #     for entity in annotations.web_entities:
    #         print('\n\tScore      : {}'.format(entity.score))
    #         print(u'\tDescription: {}'.format(entity.description))

    # if annotations.visually_similar_images:
    #     print('\n{} visually similar images found:\n'.format(
    #         len(annotations.visually_similar_images)))

    #     for image in annotations.visually_similar_images:
    #         print('\tImage url    : {}'.format(image.url))

    if response.error.message:
        raise Exception('{}\nFor more info on error messages, check: '
                        'https://cloud.google.com/apis/design/errors'.format(
                            response.error.message))

    return annotations
Beispiel #12
0
def call_api():
    # authorize twitter, initialize tweepy
    screen_name = "NBA"
    lable_output = "label.json"
    Image_Number = 4
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)

    # Get the tweets from a user up to 200
    tweets = api.user_timeline(screen_name=screen_name,
                               count=200,
                               include_rts=False,
                               exclude_replies=True)

    last_id = tweets[-1].id

    while (True):
        more_tweets = api.user_timeline(screen_name=screen_name,
                                        count=200,
                                        include_rts=False,
                                        exclude_replies=True,
                                        max_id=last_id - 1)
        # If there are no more tweets
        if (len(more_tweets) == 0):
            break
        else:
            last_id = more_tweets[-1].id - 1
            tweets = tweets + more_tweets

    # Obtain the full path for the images
    media_files = set()
    for status in tweets:
        media = status.entities.get('media', [])
        if (len(media) > 0):
            media_files.add(media[0]['media_url'])

    count = 0

    for media_file in media_files:
        #print(media_file)
        wget.download(media_file, 'image' + str(count) + media_file[-4:])

        count += 1
        if count == Image_Number:
            break

    # Instantiates a client, specify the cert file
    os.environ[
        "GOOGLE_APPLICATION_CREDENTIALS"] = "/usr/local/lib/googlecloudsdk/EC500hw1-70f8d0b192a0.json"
    client = vision.ImageAnnotatorClient()

    data = {}
    data['Pictures'] = []
    data['Account'] = screen_name
    pictures = [pic for pic in os.listdir(".") if pic.endswith('jpg')]
    picNum = 0
    order = 0
    for i in pictures:

        file = os.path.join(os.path.dirname(__file__), i)
        new = str(picNum) + '.jpg'
        os.renames(file, new)
        Subject = {}
        picNum = picNum + 1

        # Loads the image into memory
        with io.open(new, 'rb') as image_file:
            content = image_file.read()

        image = types.Image(content=content)
        response = client.label_detection(image=image)
        labels = response.label_annotations

        label_list = []
        for label in labels:
            label_list.append(label.description)

        Subject[str(order)] = label_list
        data['Pictures'].append(Subject)

        order = order + 1

    #Create json file
    with open(lable_output, 'w') as JSONObject:
        json.dump(data, JSONObject, indent=4, sort_keys=True)
    #Initialize MongoDB
    client = MongoClient()
    db = client.picture.database
    collection = db.picture_collection
    db.posts.insert(data)

    pprint.pprint(db.posts.find_one({'Account': screen_name}))

    os.system(
        "cat *.jpg | ffmpeg -f image2pipe -framerate .5 -i - -vf 'crop=in_w-1:in_h' -vcodec libx264 video.mp4"
    )
Beispiel #13
0
def async_detect_document(gcs_source_uri, gcs_destination_uri):
    """OCR with PDF/TIFF as source files on GCS"""
    import re
    from google.cloud import vision
    from google.cloud import storage
    from google.protobuf import json_format
    # Supported mime_types are: 'application/pdf' and 'image/tiff'
    mime_type = 'application/pdf'

    # How many pages should be grouped into each json output file.
    batch_size = 2

    client = vision.ImageAnnotatorClient()

    feature = vision.types.Feature(
        type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)

    gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
    input_config = vision.types.InputConfig(gcs_source=gcs_source,
                                            mime_type=mime_type)

    gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
    output_config = vision.types.OutputConfig(gcs_destination=gcs_destination,
                                              batch_size=batch_size)

    async_request = vision.types.AsyncAnnotateFileRequest(
        features=[feature],
        input_config=input_config,
        output_config=output_config)

    operation = client.async_batch_annotate_files(requests=[async_request])

    print('Waiting for the operation to finish.')
    operation.result(timeout=180)

    # Once the request has completed and the output has been
    # written to GCS, we can list all the output files.
    storage_client = storage.Client()

    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
    bucket_name = match.group(1)
    prefix = match.group(2)

    bucket = storage_client.get_bucket(bucket_name)

    # List objects with the given prefix.
    blob_list = list(bucket.list_blobs(prefix=prefix))
    print('Output files:')
    for blob in blob_list:
        print(blob.name)

    # Process the first output file from GCS.
    # Since we specified batch_size=2, the first response contains
    # the first two pages of the input file.
    output = blob_list[0]

    json_string = output.download_as_string()
    response = json_format.Parse(json_string,
                                 vision.types.AnnotateFileResponse())

    # The actual response for the first page of the input file.
    first_page_response = response.responses[0]
    annotation = first_page_response.full_text_annotation

    # Here we print the full text from the first page.
    # The response contains more information:
    # annotation/pages/blocks/paragraphs/words/symbols
    # including confidence scores and bounding boxes
    print(u'Full text:\n{}'.format(annotation.text))
Beispiel #14
0
# Imports the Google Cloud client library
from google.cloud import vision
from google.cloud.vision import types

from utilities.digicon_classes import coordinate, boundingBox, image_location

class FeatureType(Enum):
    PAGE = 1
    BLOCK = 2
    PARA = 3
    WORD = 4
    SYMBOL = 5

# Instantiates a client
CLIENT = vision.ImageAnnotatorClient()
# google_sample.pkl stores the returned types.AnnotateImageResponse object for frontend/src/samples/1.jpg
# Saves us unnecessary calls to the API while testing
# with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'google_sample.pkl'), 'rb') as input:
#     SAMPLE_RESPONSE = pickle.load(input)

def parse_google_ocr(ocr_response):
    # Collect specified feature bounds by enumerating all document features
    sentence_bounds = []
    word_bounds = []
    all_text = ''
    temp_coordinate = coordinate(0, 0)
    for page in ocr_response.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                paragraph_bounds = []
Beispiel #15
0
def get_all_tweets(screen_name):

    #Twitter only allows access to a users most recent 3240 tweets with this method
    
    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)
    
    #initialize a list to hold all the tweepy Tweets
    alltweets = []    
    
    #make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name = screen_name,count=10)
    
    #save most recent tweets
    alltweets.extend(new_tweets)
    
    #save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1
    
    #keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:
        
        #all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name = screen_name,count=10,max_id=oldest)
        
        #save most recent tweets
        alltweets.extend(new_tweets)
        
        #update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        if(len(alltweets) > 15):
            break
        print ("...%s tweets downloaded so far" % (len(alltweets)))
    data = {}
    data['info'] = []
    data['info'].append({
        'name': screen_name
        })
    media_files = set()
    for status in alltweets :
        try :
            media = status.extended_entities.get('media', [])
        except :
            media = status.entities.get('mdeia',[])
        # print (media[0])
        if(len(media) > 0):
            for i in range(len(media)):
             media_files.add(media[i]['media_url'])
    # mm = 1
    for media_file in media_files:
    #     data['info'].append({
    #         'picture ' + str(m): media_file
    #         })
    #     mm = mm + 1
        print(media_file)
        wget.download(media_file)
    
    os.system("ffmpeg -framerate 1 -pattern_type glob -i '*.jpg'  -c:v libx264 -r 30 -pix_fmt yuv420p out1.mp4")
    os.system("ffmpeg -framerate 1 -pattern_type glob -i '*.png' -c:v libx264 -r 30 -pix_fmt yuv420p  out2.mp4")



    

   # for google vision
    client = vision.ImageAnnotatorClient()
    file = open("label.txt","w")

    
    point = 0
    numlist = '0123456789'
    OBJ = [pic for pic in listdir(".") if pic.endswith('jpg') or pic.endswith('png')]
    # print(OBJ)
    for i in OBJ:
        file_name = os.path.join(os.path.dirname(__file__),i)
        
        
        ttt = 1            
        new_name = numlist[point] +'.jpg'
        data['info'].append({
            'picture ' + str(ttt): i
            })
         
        os.renames(file_name,new_name)
             
        print(file_name)
        print("changed down")
        point = point + 1
    # Loads the image into memory
        with io.open(new_name, 'rb') as image_file:
             content = image_file.read()
       
        image = types.Image(content=content)

        # Performs label detection on the image file
        response = client.label_detection(image=image)
        labels = response.label_annotations
       
        file.write('Lables for  '+new_name+'  :\n')
       
        
        label_list = []
        for label in labels:
           label_list.append(label.description)
           file.write(label.description+'\n')
        
        data['info'].append({
            'description ' + str(ttt):label_list 
            })   
        ttt += 1
    file.close()
    print(data)
    client = MongoClient()
    db = client.picture.database
    collection = db.picture_collection

    posts = db.posts
    posts.insert_one(data)
def get_vision_instance():
    client = vision.ImageAnnotatorClient()
    return client
Beispiel #17
0
def ProcessMaerskInvoice(ImageList):
    keywordlist = [
        'no.:', 'maersk', 'from:', 'to:', 'description:', 'quantity',
        'itinerary', "size", 'sub.', 'collapsible', 'gross', 'equip', 'pack.',
        'weight', 'volume', 'qty/kind', 'type', 'release', 'vessel', 'voy',
        'etd', 'eta'
    ]
    ############ Preprocess Image ###########

    for image in ImageList:
        currentImage = cv2.imread(image)
        currentImage[currentImage < 10] = 0
        currentImage[(currentImage != 0) & (currentImage != 255)] = 255
        cv2.imwrite(image, currentImage)
    ################ Invoke Vision API for 2nd page ############################
    try:
        for count, image in enumerate(ImageList):
            if count < 2:
                currentfile = ImageList[count]
                with io.open(currentfile, 'rb') as gen_image_file:
                    content = gen_image_file.read()
                client = vision.ImageAnnotatorClient()
                #image = vision.types.Image(content=content)
                image = vision.Image(content=content)
                response = client.text_detection(image=image)
                DictResponse = MessageToDict(response._pb)
                if count == 0:
                    FirstPageDictResponse = DictResponse
                else:
                    SecondPageDictResponse = DictResponse
    except:
        return "invocation error"
    ############# Create Message To Dict For 2nd Page ###############
    SecondPageDictResponse = MessageToDict(response._pb)
    ############# Check for Keywords ##################
    WholeContentDescription = FirstPageDictResponse['textAnnotations'][0][
        'description'].lower() + " " + SecondPageDictResponse[
            'textAnnotations'][0]['description'].lower()
    match = 0
    for keyword in keywordlist:
        if keyword in WholeContentDescription:
            match = match + 1
        else:
            print(keyword)
    if match != len(keywordlist):
        return "missing keywords"
    ############# create Dataframes #########################
    WordsAndCoordinatesPage1 = FirstPageDictResponse['textAnnotations'][1:]
    WordsAndCoordinatesPage2 = SecondPageDictResponse['textAnnotations'][1:]
    WordsAndCoordinates = [WordsAndCoordinatesPage1, WordsAndCoordinatesPage2]
    for num in range(0, len(WordsAndCoordinates)):
        currentWordandCoordinate = WordsAndCoordinates[num]
        word_list = []
        llx_list = []
        lly_list = []
        lrx_list = []
        lry_list = []
        urx_list = []
        ury_list = []
        ulx_list = []
        uly_list = []
        for i in range(0, len(currentWordandCoordinate)):
            word_list.append(currentWordandCoordinate[i]['description'])
            llx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][0]['x'])
            lly_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][0]['y'])
            lrx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][1]['x'])
            lry_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][1]['y'])
            urx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][2]['x'])
            ury_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][2]['y'])
            ulx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][3]['x'])
            uly_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][3]['y'])
        ##################### Create Dictionary for the lists #####################
        WordsAndCoordinatesDict = {
            "Word": word_list,
            'llx': llx_list,
            'lly': lly_list,
            'lrx': lrx_list,
            'lry': lry_list,
            'urx': urx_list,
            'ury': ury_list,
            'ulx': ulx_list,
            'uly': uly_list
        }
        ####################### Create Dataframe ######################
        if num == 0:
            WordsAndCoordinatesDF_Page1 = pd.DataFrame.from_dict(
                WordsAndCoordinatesDict)
        elif num == 1:
            WordsAndCoordinatesDF_Page2 = pd.DataFrame.from_dict(
                WordsAndCoordinatesDict)
    ###################### Get Values ###########################
    try:
        ############## Booking Number ############################
        BookingNumber_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['uly'].values[0] - 20
        BookingNumber_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['lly'].values[0] + 20
        BookingNumber_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['urx'].values[0]
        MeraskSpot_llx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(
                ['Maersk'])].sort_values(by='lly').head(1)['llx'].values[0]
        BookingNumber = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > BookingNumber_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < BookingNumber_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > BookingNumber_urx) &
            (WordsAndCoordinatesDF_Page1['urx'] < MeraskSpot_llx)]
        BookingNumber = " ".join(BookingNumber['Word'].values).strip()
        print(BookingNumber)
        ############## From #############################
        From_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['uly'].values[0] - 30
        From_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['lly'].values[0] + 30
        From_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['urx'].values[0]
        From = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > From_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < From_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > From_urx)]
        From = " ".join(From['Word'].values).strip()
        print(From)
        ################# To #############################
        To_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'
                                                      ])]['uly'].values[0] - 20
        To_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'
                                                      ])]['lly'].values[0] + 20
        To_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'])]['urx'].values[0]
        To = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > To_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < To_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > To_urx)]
        To = " ".join(To['Word'].values).strip()
        print(To)
        ############# Commodity Description ###################
        Description_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['uly'].values[0] - 20
        Description_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['lly'].values[0] + 20
        Description_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['urx'].values[0]
        Commodity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > Description_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < Description_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > Description_urx)]
        CommodityDescription = " ".join(Commodity['Word'].values).strip()
        ################ Quantity #########################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Size_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Size/Type/Height'
                                                      ])]['llx'].values[0]
        Quantity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Size_LLX)]
        Quantity = " ".join(Quantity['Word'].values).strip()
        print(Quantity)
        ################ Size ##############################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Quatity_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lrx'].values[0]
        Sub_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Sub.'
                                                      ])]['llx'].values[0]
        Size = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Quatity_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Sub_LLX)].sort_values(
                by=['llx'])
        Size = " ".join(Size['Word'].values).strip()
        print(Size)
        ################ Sub Equipment #####################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Collapsible_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Collapsible'
                                                      ])]['lrx'].values[0]
        Gross_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Gross'
                                                      ])]['llx'].values[0]
        SubEquipment = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Collapsible_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Gross_LLX)]
        SubEquipment = " ".join(SubEquipment['Word'].values).strip()
        ############### Gross Weight #####################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Equip_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Equip'
                                                      ])]['lrx'].values[0]
        Pack_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Pack.'
                                                      ])]['llx'].values[0]
        GrossWeight = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Equip_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Pack_LLX)]
        GrossWeight = " ".join(GrossWeight['Word'].values).strip()
        ############### Pack Quantity ######################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Weight_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Weight'
                                                      ])]['lrx'].values[0] + 40
        Cargo_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Volume'
                                                      ])]['llx'].values[0]
        PackQuantity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Weight_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Cargo_LLX)]
        PackQuantity = " ".join(PackQuantity['Word'].values).strip()
        ############## Cargo Volume ##########################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Weight_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Qty/Kind'
                                                      ])]['lrx'].values[0] + 20
        CargoVolume = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Weight_LRX)]
        CargoVolume = " ".join(CargoVolume['Word'].values).strip()
        ######## Load Itinerary Type and Location ###############
        Type_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['lly'].values[0]
        MaxLowerLimit = Type_lly + 160
        Type_urx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['urx'].values[0]
        MaxURX = Type_urx + 160
        LoadItineraryType = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Type_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < MaxLowerLimit) &
            (WordsAndCoordinatesDF_Page2['lrx'] < MaxURX)]
        LoadItineraryType = " ".join(LoadItineraryType['Word'].values)
        Location_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Location'
                                                      ])]['lly'].values[0]
        MaxLowerLimit = Location_lly + 160
        Type_urx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['urx'].values[0]
        MaxURX = Type_urx + 160
        Release_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Release'
                                                      ])]['ulx'].values[0]
        LoadItineraryLocation = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Location_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < MaxLowerLimit) &
            (WordsAndCoordinatesDF_Page2['lrx'] < Release_llx) &
            (WordsAndCoordinatesDF_Page2['llx'] > MaxURX)]
        LoadItineraryLocation = " ".join(LoadItineraryLocation['Word'].values)
        ############### TransportPlanVessel ##########################
        Vessel_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Vessel'
                                                      ])]['lly'].values[0] + 20
        max_lly = Vessel_lly + 80
        Vessel_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Vessel'
                                                      ])]['llx'].values[0]
        Voy_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'])]['llx'].values[0]
        TransportPlanVessel = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Vessel_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= Vessel_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < Voy_llx)]
        TransportPlanVessel = " ".join(TransportPlanVessel['Word'].values)
        ############ TransportVoyNumber #############################
        Voy_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'
                                                      ])]['lly'].values[0] + 20
        max_lly = Voy_lly + 80
        Voy_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'])]['llx'].values[0]
        ETD_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'])]['llx'].values[0]
        TransportVoyNumber = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Voy_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= Voy_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < ETD_llx)]
        TransportVoyNumber = " ".join(TransportVoyNumber['Word'].values)
        ############## TransportPlanETD ###############################
        ETD_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'])]['lly'].values[0]
        max_lly = Voy_lly + 80
        ETD_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'
                                                      ])]['llx'].values[0] - 20
        ETA_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'])]['llx'].values[0]
        TransportPlanETD = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > ETD_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= ETD_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < ETA_llx)]
        TransportPlanETD = " ".join(TransportPlanETD['Word'].values)
        ################## TransportPlanETA #############################
        ETA_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'])]['lly'].values[0]
        max_lly = ETA_lly + 80
        ETA_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'
                                                      ])]['llx'].values[0] - 20
        TransportPlanETA = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > ETA_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= ETA_llx)]
        TransportPlanETA = " ".join(TransportPlanETA['Word'].values)
        print(TransportPlanETA)
        return dict(msg="Success",
                    BookingNumber=BookingNumber,
                    From=From,
                    To=To,
                    CommodityDescription=CommodityDescription,
                    Quantity=Quantity,
                    Size=Size,
                    SubEquipment=SubEquipment,
                    GrossWeight=GrossWeight,
                    PackQuantity=PackQuantity,
                    CargoVolume=CargoVolume,
                    LoadItineraryType=LoadItineraryType,
                    LoadItineraryLocation=LoadItineraryLocation,
                    TransportPlanVessel=TransportPlanVessel,
                    TransportVoyNumber=TransportVoyNumber,
                    TransportPlanETD=TransportPlanETD,
                    TransportPlanETA=TransportPlanETA)
    except:
        return "unable to extract data from Google Vision API."
Beispiel #18
0
import io
import os
import re

#*****************************************************
#               Google Vision API
#*****************************************************

# Imports the Google Cloud client library
from google.cloud import vision
from google.cloud.vision import types
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file('key.json')
client = vision.ImageAnnotatorClient(
    credentials=credentials)  # [END vision_python_migration_client]

for i in range(1, 10):
    # The name of the image file to annotate (Change the line below 'image_path.jpg' ******)
    file_name = os.path.join(os.path.dirname(__file__),
                             str(i) +
                             '.jpg')  # Your image path from current directory

    with io.open(file_name, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
Beispiel #19
0
def init():
    # [END vision_python_migration_import]
    # Instantiates a g_client
    # [START vision_python_migration_client]
    global g_client
    g_client = vision.ImageAnnotatorClient()
Beispiel #20
0
def get_labels(Start_Pic_No):
    # Setup to access to the Google Vision API
    # os.system cannot upload the credential correctly, so FOR NOW it is necessary to run this in shell
    client = vision.ImageAnnotatorClient()
    i = 1
    print('Getting labels from google and printing labels on it')
    while (1):
        # Check if there are pictures inside the folder
        if os.path.exists('./PICS/' + str(i) + '.jpg') == True:
            file_name = os.path.join(os.path.dirname(__file__),
                                     './PICS/' + str(i) + '.jpg')
            # Read the pictures and get ready to push it to Google
            with io.open(file_name, 'rb') as image_file:
                content = image_file.read()

            image = types.Image(content=content)

            # Get the labels from Google Vision
            try:
                response = client.label_detection(image=image)
                labels = response.label_annotations
            except:
                print(
                    'Google API is not accessable at this time, please check your creditional or try again later.'
                )
                return 0

            # Connect to the database and create a cursor
            cnx = mysql.connector.connect(user=DB_USER,
                                          password=DB_PASSWORD,
                                          database='Pics')
            cursor = cnx.cursor()

            # Setup the cursor
            add_tags = ("INSERT IGNORE INTO tags "
                        "(tags_no, tags) "
                        "VALUES (%s, %s)")
            add_pic_tags = ("INSERT IGNORE INTO pic_tags "
                            "(Pic_No, tags_no) "
                            "VALUES (%s, %s)")

            # Setup PILLOW to put labels into the picture
            # Input the direction of your fonts here
            im = Image.open('./PICS/' + str(i) + '.jpg')
            draw = ImageDraw.Draw(im)
            myfont = ImageFont.truetype(fonts, size=35)
            # As a result, the FONTs.ttf should be copied to the same folders
            fillcolor = 'red'
            # Put label into the picture
            m = 0
            for label in labels:
                m = m + 1
                # Put tags into database, and put the connections between pic and tags into database
                cursor.execute("SELECT MAX(tags_no) FROM tags")
                tags_start_id = cursor.fetchall()[0][0]
                if tags_start_id == None:
                    tags_start_id = 0
                data_tags = (tags_start_id + 1, label.description)
                cursor.execute(add_tags, data_tags)
                cnx.commit()
                cursor.execute("SELECT tags_no FROM tags WHERE tags = '" +
                               label.description + "'")
                tags_no = cursor.fetchall()[0][0]
                data_pic_tags = (Start_Pic_No + i, tags_no)
                cursor.execute(add_pic_tags, data_pic_tags)
                cnx.commit()
                if m <= 2:
                    # Only draw 3 tags into the picture
                    draw.text((40, 40 * m),
                              label.description,
                              font=myfont,
                              fill=fillcolor)
            im.save('./PICS/' + str(i) + '.jpg', 'JPEG')
            print('Printing labels on the ' + str(i) + 'th Picture')
            i = i + 1
        # Print the total number of the pictures
        else:
            print('***************************')
            print(str(i - 1) + ' pictures completed')
            print('***************************')
            cursor.close()
            cnx.close()
            return 1
            break
Beispiel #21
0
def _get_labels(uri):
    client = vision.ImageAnnotatorClient()
    image = vision.types.Image()
    image.source.image_uri = uri
    labels = client.label_detection(image=image).label_annotations
    return {'labels': [l.description for l in labels]}
Beispiel #22
0
def detect_text(image_url):
    image = build_image_url(image_url)
    client = vision.ImageAnnotatorClient()
    response = client.text_detection(image=image)
    return response.text_annotations[0].description.split('\n')
Beispiel #23
0
 def __init__(self):
     self.storage_client = storage.Client()
     self.vision_client = vision.ImageAnnotatorClient()
     self.results = {}
Beispiel #24
0
 def __init__(self):
     os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(
         os.getcwd().strip('vision_api'), 'guestbook-93c84e7825ff.json')
     self.client = vision.ImageAnnotatorClient()
     self.called = False
Beispiel #25
0
 def get_client_lazy(self, **kwargs):
     from google.cloud import vision
     return vision.ImageAnnotatorClient(credentials=self.get_credential())
 def __init__(self, service_account_file, cache=True):
     credentials = service_account.Credentials.from_service_account_file(
         service_account_file)
     self.client = vision.ImageAnnotatorClient(credentials=credentials)
     self.cache = cache
Beispiel #27
0
    ToggleButton:
        text: 'Start / Stop'
        on_press: camera.play = not camera.play
        size_hint_y: None
        height: '48dp'
    Button:
        text: 'Capture'
        size_hint_y: None
        height: '48dp'
        on_press: root.capture()
''')

credentials = service_account.Credentials.from_service_account_file(
    "credential-vision.json")
# Instantiates a client
client = vision.ImageAnnotatorClient(credentials=credentials)


def image_process(file_name):
    with io.open(file_name, 'rb') as image_file:
        content = image_file.read()

    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    result = []
    title = ""
    max_area = 0
    for text in texts:
        s = text.description
Beispiel #28
0
def loadPdfText(fPath):
    gcs_source_uri = "gs://revaise.appspot.com/" + fPath
    print(gcs_source_uri)
    #gcs_source_uri = "gs://revaise.appspot.com/images/picture-ScienceVideo Game Deep RL0.8575049874802985"
    gcs_destination_uri = "gs://revaise.appspot.com/TextOutput/"
    gcs_destination = vision.GcsSource(uri=gcs_destination_uri)
    mime_type = "application/pdf"
    # mime_type = "image/png"
    # How many pages should be grouped into each json output file.
    batch_size = 2

    client = vision.ImageAnnotatorClient()

    feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
    print(gcs_source_uri)
    gcs_source = vision.GcsSource(uri=gcs_source_uri)
    input_config = vision.InputConfig(gcs_source=gcs_source,
                                      mime_type=mime_type)
    print(input_config)
    gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
    output_config = vision.OutputConfig(gcs_destination=gcs_destination,
                                        batch_size=batch_size)
    print(output_config)
    import time
    time.sleep(1)
    async_request = vision.AsyncAnnotateFileRequest(
        features=[feature],
        input_config=input_config,
        output_config=output_config)

    operation = client.async_batch_annotate_files(requests=[async_request])

    print('Waiting for the operation to finish.')
    operation.result(timeout=420)

    storage_client = storage.Client()
    match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
    bucket_name = match.group(1)
    prefix = match.group(2)

    bucket = storage_client.get_bucket(bucket_name)
    # List objects with the given prefix.
    blob_list = list(bucket.list_blobs(prefix=prefix))
    print('Output files:')
    for blob in blob_list:
        print(blob.name)

    txt = ""
    for output in blob_list:

        json_string = output.download_as_string()
        if json_string != b'':
            response = json.loads(json_string)

            # The actual response for the first page of the input file.
            for j in response['responses']:
                txt += j['fullTextAnnotation']['text'] + " "
    with open("park.txt", "w+") as f:
        f.write(txt)
    for blob in blob_list:
        blob.delete()
    print(txt)
    return txt
Beispiel #29
0
-- setting 에서 panads 모듈 추가

import os
import io
from google.cloud import vision
import pandas as pd

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=r'vision2020-290100-c6b16014803d.json'

client = vision.ImageAnnotatorClient()

file_name = 'setagaya_small.jpeg'
image_path = f'./resources/{file_name}'

with io.open(image_path, 'rb') as image_file:
    content = image_file.read()

# construct an iamge instance
#image = vision.types.Image(content=content)  #pip install google-cloud-vision == 1.0.0
image = vision.Image(content=content)  
response = client.label_detection(image=image)
labels = response.label_annotations

df = pd.DataFrame(columns=['description', 'score', 'topicality'])

for label in labels:
    df = df.append(
        dict(
            description=label.description,
            score=label.score,
            topicality=label.topicality
Beispiel #30
0
from sys import stdin, stdout
from json import loads, dumps
from decouple import config
from datauri import DataURI
from google.cloud import vision
from google.cloud.vision import types
from google.oauth2 import service_account
import os
import urllib.request

api_key = config("GOOGLE_APPLICATION_CREDENTIALS")
api_key = loads(api_key)
api_key = service_account.Credentials.from_service_account_info(api_key)
client = vision.ImageAnnotatorClient(credentials=api_key)


def transcribe(uri):
    # Parses a URI and gets the encoded data string out
    data = DataURI(uri).data
    image = types.Image(content=data)
    response = client.document_text_detection(image)

    if response.text_annotations:
        return response.text_annotations[0].description.replace('\n', ' ')
    else:
        return "No Text"


# To be replaced with a function that gets the metadata out of the function
def nothing(transcript):
    return {"nada": "nothing"}