Ejemplo n.º 1
0
def main(argv):
    inputFile = ""
    dataFile = ""
    dbFile = "riskData.db"
    outputFile = "riskFactor_output.txt"
    tableName = "risks"
    try:
        opts,args = getopt.getopt(argv,"i:d:",["ifile=","dfile="])
    except getopt.GetoptError:
        print "python bayes.py -i <inputfile> -d <datafile>"
        sys.exit(2)
    for opt,arg in opts:
        if opt == "-i":
            inputFile = arg
        elif opt == "-d":
            dataFile = arg

    fin = open(inputFile,"r")
    fout = open(outputFile,"w")

    graph = initGraph()
    db.initDB(dataFile,dbFile)
    conn = db.getConnection(dbFile)
    cursor = db.getCursor(conn)
    initCPT(graph,cursor,tableName)

    lines = fin.readlines()
    testCaseNum = (int)(lines[0])
    for i in range(1,testCaseNum+1):
        query = eval(lines[i])


    db.endConnection(conn)
    fin.close()
    fout.close()
def insert_actor_to_casts(globalmovieid,name,lastname,username):

    db = getCursor()
    cur = db.cursor()
    #check if is in this movie casts
    sql = "select personid from Persons where name = %s and lastname = %s "
    try:
           cur.execute(sql, [name,lastname])
           result = cur.fetchall()
           for row in result:
               personid = row[0]

               sql = "select count(1) from Casts where imdbid = %s and personid = %s "
               cur.execute(sql, [globalmovieid,personid])
               if cur.fetchone()[0]:
                   print "Actor is in this movie cast"
                   #done nothing to update
                   #if its not insert to casts
               else:
                   sql = "insert into Casts(name,lastname,personid,imdbid,moviename) values(%s,%s,%s,%s,%s)"
                   try:
                      cur.execute(sql,[name,lastname,personid,globalmovieid,username])
                      db.commit()
                   except MySQLdb.Error, e:
                       db.rollback()
                       print e
           cur.close()
Ejemplo n.º 3
0
def getAddressJson(eid):
    # json with all geocoded data
    j = {}
    with db.getCursor() as cur:
        cur = db.execute(cur, "SELECT json FROM entities WHERE eid=%s", [eid])
        row = cur.fetchone()
        if row is None: return None
        j = json.loads(row["json"])

    # TODO: do not duplicate this with code in verejne/
    def getComponent(json, typeName):
        try:
            for component in json[0]["address_components"]:
                if typeName in component["types"]:
                    return component["long_name"]
            return ""
        except:
            return ""

    # types description: https://developers.google.com/maps/documentation/geocoding/intro#Types
    # street / city can be defined in multiple ways
    address = {
        "street": (getComponent(j, "street_address") +
                   getComponent(j, "route") + getComponent(j, "intersection") +
                   " " + getComponent(j, "street_number")),
        "city":
        getComponent(j, "locality"),
        "zip":
        getComponent(j, "postal_code"),
        "country":
        getComponent(j, "country"),
    }
    return address
Ejemplo n.º 4
0
def scrap_user_reviews(movieid,review_url):

    page = requests.get(review_url, headers=headers);
    tree = html.fromstring(page.content);
    tree.make_links_absolute(review_url)

    reviews = tree.xpath('//div[@class="yn"]/preceding-sibling::p/text()[1]')
    review_titles = tree.xpath('//h2/text()')

    print len(reviews)
    print len(review_titles)
    if reviews:
        if review_titles:
            if len(reviews) == len(review_titles):
                print movieid
                print 'insert review...'
                print review_url
                db = getCursor()
                for i in xrange(len(review_titles)):
                   title = review_titles[i].strip()
                   review = reviews[i].strip()
                   cur = db.cursor()
                   sql = "insert into reviews(movieid,title,review) values(%s,%s,%s)"
                   try:
                      cur.execute(sql,[movieid,title,review])
                      db.commit()
                   except MySQLdb.Error, e:
                      db.rollback()
                      print e
                db.close()
def actorphoto(personid,link):
    print "photo scrap..."
    print link

    page = requests.get(link, headers=headers);
    tree = html.fromstring(page.content);
    tree.make_links_absolute(link)

    imglinks = tree.xpath('//div[@id="media_index_thumbnail_grid"]//a/img/@src')

    if imglinks:
        db = getCursor()
        cur = db.cursor()
        for i in xrange(len(imglinks)):
            img = imglinks[i]
            sql = "insert into Images(imageurl) values(%s)"
            try:
                cur.execute(sql,[imglinks[i]])
                sql = "select imageid from Images where imageurl = %s"
                cur.execute(sql,[img])
                result = cur.fetchall()
                for row in result:
                    imageid = row[0]
                    sql = "insert into PersonImages(personid,imageid) values(%s,%s)"
                    cur.execute(sql,[personid,imageid])
                    db.commit()
            except MySQLdb.Error, e:
                print e
                db.rollback()
            if i == photos_url_per_actor:
                print 'max photos '+`i`
                break
        db.commit()
        db.close()
Ejemplo n.º 6
0
 def createCall(self, id):
     # list [type, addr, city, zip, place, phone, desc, officer_ID]
     callList = [self.ids.callType.text, self.ids.streetAddr.text, self.ids.city.text, self.ids.zip.text,
                 self.ids.place.text, self.ids.phone.text, self.ids.description.text, id]
     for item in callList: # Error Handling - Opens error msg when something is wrong
         if item is "" or 0: # Check if there are any empty fields
             scrn.error = "Empty Field: Please fill in all text boxes."
             error = popupError()
             error.open()
             return
     try:
         testInt = int(callList[3])
         if len(callList[3]) != 5: # Check if Zip Code is 5 digits long
             scrn.error = "Zip Code Format: Ensure Zip Code is a 5-digit number."
             error = popupError()
             error.open()
             return
     except: # If it has any letters/other chars other than numbers
         scrn.error = "Zip Code Format: Ensure Zip Code is a number."
         error = popupError()
         error.open()
         return
     officer = self.ids.ob.getOfficer(id)
     cursor = db.getCursor()
     cursor.execute("select status from officer where officer_id = %s", id)
     for row in cursor:
         if row["status"] == 0:
             scrn.error = "Officer is not available."
             error = popupError()
             error.open()
             return
     call = dispatchCall(callList)
     self.ids.ob.getOfficer(id).sendCall()
     self.clearFields()
Ejemplo n.º 7
0
def checkOffline():
    cursor = db.getCursor()
    for off in globals.onlineOfficers:
        cursor.execute("select on_duty from officer where officer_id = %s",
                       off.id)
        for row in cursor:
            if row['on_duty'] is 0:
                globals.onlineOfficers.remove(off)
                globals.screens[2].ids.ob.deleteOfficer(off.id)
def insert_series(movieid,imdbid,poster):
    db = getCursor()
    cur = db.cursor()
    sql = "insert into Series(movieid,imdbid,poster) values(%s,%s,%s)"
    try:
        cur.execute(sql,[movieid,imdbid,poster])
        db.commit()
    except MySQLdb.Error ,e:
        print e
def insert_season(globalmovieid,imdb,seasonlink,season):
    db = getCursor()
    cur = db.cursor()
    sql = "insert into Season(movieid,season,link) values(%s,%s,%s)"
    try:
        cur.execute(sql,[globalmovieid,season,seasonlink])
        db.commit()
    except MySQLdb.Error ,e:
        print e
Ejemplo n.º 10
0
def populateRelated(relationship_table, colA, colB, tableA, tableB):
    print "populateRelated", relationship_table
    cur = db.getCursor()
    mapSql = "SELECT id, eid FROM entities"
    db.execute(cur, mapSql)
    id_to_eid = {}
    for row in cur.fetchall():
        id_to_eid[row["id"]] = row["eid"]

    print "loading mapping"
    mapA = getMapping(tableA)
    mapB = getMapping(tableB)
    print "mapping loaded"
    sql = "SELECT " + colA + ", " + colB + " FROM " + relationship_table
    cur = db.getCursor()
    db.execute(cur, sql)
    index = 0
    for row in cur.fetchall():
        index += 1
        if (index % 50 == 0):
            print "index", index
            db.db.commit()
        valA = row.get(colA, None)
        valB = row.get(colB, None)
        if (valA is not None) and (valB is not None) and (valA in mapA) and (
                valB in mapB):
            newA = mapA[valA]
            newB = mapB[valB]
            if not newA in id_to_eid:
                logging.info("Missing " + str(newA) + " in id_to_eid")
                continue
            if not newB in id_to_eid:
                logging.info("Missing " + str(newB) + " in id_to_eid")
                continue
            db.insertDictionary(
                "related", {
                    "id1": newA,
                    "eid1": id_to_eid[newA],
                    "id2": newB,
                    "eid2": id_to_eid[newB]
                })
    db.db.commit()
Ejemplo n.º 11
0
def insert_episode(movieid,imdb,season,title,image,plot,episode):
    title = remove_all_special_chars(title)
    plot = remove_all_special_chars(plot)
    db = getCursor()
    cur = db.cursor()
    sql = "insert into Episodes(movieid,title,plot,poster,season,episode,imdb) values(%s,%s,%s,%s,%s,%s,%s)"
    try:
        cur.execute(sql,[movieid,title,plot,image,season,episode,imdb])
        db.commit()
    except MySQLdb.Error, e:
        print e
Ejemplo n.º 12
0
def processRelated():
    logging.info("processRelated")
    cur = db.getCursor()
    db.execute(cur, "DELETE FROM related")
    db.db.commit()
    populateRelated("people_esd", "organization_id", "record_id",
                    "orsresd_geocoded_", "people_esd_geocoded_")
    populateRelated("orsr_relationships", "id_osoby", "id_firmy",
                    "orsr_persons_geocoded_", "firmy_unified2_geocoded_")
    populateRelated("relation", "_record_id", "_record_id",
                    "relation_from_geocoded_", "relation_to_geocoded_")
Ejemplo n.º 13
0
def IcoToLatLngMap():
    output_map = {}
    for table in ["orsresd_data", "firmy_data", "new_orsr_data"]:
        with db.getCursor() as cur:
            sql = "SELECT ico, lat, lng FROM " + table + \
                  " JOIN entities on entities.id = " + table + ".id" + \
                  " WHERE ico IS NOT NULL"
            db.execute(cur, sql)
            for row in cur:
                output_map[int(row["ico"])] = (row["lat"], row["lng"])
    return output_map
Ejemplo n.º 14
0
def getCallID():
    cursor = db.getCursor()  # Database cursor object
    top = 0
    cursor.execute("select call_id from calls;")
    if cursor.rowcount is 0:
        return 1

    for row in cursor:
        if row["call_id"] > top:
            top = row["call_id"]
    cursor.close()
    return top + 1
Ejemplo n.º 15
0
    def loginButton(self,):
        # VARIABLES
        password = str(self.ids.password.text)  # Password the user entered
        username = str(self.ids.username.text)  # Username the user entered
        cursor = db.getCursor()                   # Database cursor object

        # LOGIC
        cursor.execute("select * from officer;")    # Execute SQL code to gather officer info
        # Check each instance of officer for if the username exists
        for row in cursor:
            # If username exists
            if row["username"] == str(username):
                # If Password matches username's password
                if row["pass"] == str(password):
                    globals.info[0] = row["last_name"]  # Set global last name to their last name

                    # CHECK WHAT USER TYPE THEY ARE AND LAUNCH THAT SCREEN
                    #   1. Switch screen to splash screen (Currently not working?)
                    #   2. Screen array at bucket 2, create new screen for that type of user
                    #   3. Clear login information stuff
                    #   4. Switch screen to the user's screen
                    #   5. Break out of the function

                    # If they are a dispatcher
                    if row["dispatch"] == 1:
                        scrn.switch_to(globals.screens[0])
                        globals.screens[2] = DispatchScreen()
                        self.clear()
                        scrn.switch_to(globals.screens[2])
                        return
                    # If they are an admin
                    if row["username"] == 'admin':
                        scrn.switch_to(globals.screens[0])
                        globals.screens[2] = AdminScreen()
                        self.clear()
                        scrn.switch_to(globals.screens[2])
                        return
                    # Otherwise they are an officer
                    else:
                        globals.info[1] = row["officer_id"]     # Changes info
                        scrn.switch_to(globals.screens[0])
                        globals.screens[2] = OfficerScreen()
                        self.clear()
                        scrn.switch_to(globals.screens[2])
                        return
                # If password doesn't match the usernames password
                else:
                    self.ids.status.text = "Incorrect Password"
                    return
        # If the username doesn't exist we will have reached here, let the user know it doesn't exist
        self.ids.status.text = "Username doesn't exist!"
        return
Ejemplo n.º 16
0
def seriesscrap(imdb,globalmovieid,title,datelist,genre,content_rating,ratings,rating_value,plot,link,poster):
    print "Tv series scrap..."

    #check if series exists
    db = getCursor()
    cur = db.cursor()

    cur.execute("select count(1) from Series where movieid = %s", [globalmovieid])

    if cur.fetchone()[0]:
        #if exists check if new season exists or new episodes and update
        print ''

    else:
        #get num of seasons and for every season the episodes
        #every episode title ,image,mini plot
        page = requests.get(link, headers=headers);
        tree = html.fromstring(page.content);
        tree.make_links_absolute(link)

        seasonlinks = tree.xpath('//div[@class="seasons-and-year-nav"]//br[@class="clear"]/following-sibling::div[1]/a/@href')
        seasonsnum = tree.xpath('//div[@class="seasons-and-year-nav"]//br[@class="clear"]/following-sibling::div[1]/a/text()')

        print len(seasonlinks)
        print len(seasonsnum)

        if len(seasonlinks) > 0:

            #first insert into Movie
            insert_series_into_movie(imdb,globalmovieid,title,genre,content_rating,ratings,rating_value,plot,poster)

            #Then insert into series
            insert_series(globalmovieid,imdb,poster)

            link = ("http://www.imdb.com/title/tt",str(imdb),"/")

            base_link = ''.join(link)

            if "See all" in ' '.join(seasonsnum):
                print "See all"
                maxseason = int(seasonsnum[0])
                for i in xrange(maxseason):
                    seasonlink = [base_link,"episodes?season=",str((i + 1)),"&ref_=tt_eps_sn_",str((i+1))]
                    print ''.join(seasonlink)
                    insert_season(globalmovieid,imdb,''.join(seasonlink),(i+1))

            else:
                print 'Not See all'
                for i in xrange(len(seasonlinks)):
                    print seasonsnum[i],seasonlinks[i]
                    seasonlink = [base_link,"episodes?season=",str((i+1)),"&ref_=tt_eps_sn_",str((i+1))]
                    insert_season(globalmovieid,imdb,''.join(seasonlink).strip(),(i+1))
Ejemplo n.º 17
0
def getColumnForTableIco(table, column, ico):
    sql = "SELECT " + column + " FROM " + table + \
          " JOIN entities ON entities.id = " + table + ".id" + \
          " WHERE ico = %s" + \
          " LIMIT 1"
    with db.getCursor() as cur:
        try:
            cur = db.execute(cur, sql, [ico])
            row = cur.fetchone()
            if row is None: return None
            return row[column]
        except:
            return None
Ejemplo n.º 18
0
def run():
    database = db.connect()
    cursor = db.getCursor(database)

    t1 = time.time()
    results = recommendations.getRecommendations(cursor, 3)
    t2 = time.time()
    prettyPrint(cursor, 3, results)
    t3 = time.time()
    print "REQUIRED TIME FOR RECOMENDATIONS: %0.3f ms, for querying and printing: %0.3f ms" % (
        (t2 - t1) * 1000.0, (t3 - t2) * 1000.0)
    results = recommendations.getRecommendations(cursor, 4)
    prettyPrint(cursor, 4, results)
    db.disconnect(cursor)
Ejemplo n.º 19
0
    def buildArray(self):
        cursor = db.getCursor()
        cursor.execute("select * from officer where dispatch = FALSE and officer_id > 1")
        for row in cursor:
            cur = DCADOfficerInfo()
            cur.ids.name.text = str(row["last_name"])
            cur.ids.badgeNum.text = str(row["officer_id"])
            cur.padding = [0, self.height / 10, 5, 0]
            cur.width = self.width
            cur.oid = int(row["officer_id"])
            cur.ids.onScene.state = "down"
            self.allOfficers.append(cur)

        cursor.close()
def getPersonsId(name,lastname):
    db = getCursor()
    cur = db.cursor()

    #check if is in this movie casts
    sql = "select personid from Persons where name = %s and lastname = %s "
    try:
        cur.execute(sql,[name,lastname])
        result = cur.fetchall()
        for row in result:
            personid = row[0]
            return personid
    except MySQLdb.Error, e:
        print e
Ejemplo n.º 21
0
def checkState():
    cursor = db.getCursor()
    cursor.execute("select * from officer where officer_id = %s",
                   globals.info[1])
    for row in cursor:
        scene = None
        if row['on_scene'] is 1:
            scene = 'twentyThreeD'
        if row['on_scene'] is 0:
            scene = 'twentyThreeN'
        if row['status'] is 0:
            flipState('tenSeven', scene)
        if row['status'] is 1:
            scene = None
            flipState('tenEight', scene)
Ejemplo n.º 22
0
def moviedb(i,globalmovieid,title,datelist,genre,content_rating,ratings,rating_value,plot,link,poster):
    MySQLdb.escape_string("'")

    plot = remove_all_special_chars(plot)

    #db utils
    db = getCursor()
    cur = db.cursor()

    #check if movie already in database
    cur.execute("select count(1) from Movie where imdbid = %s or movieid = %s ", [i,globalmovieid])

    if cur.fetchone()[0]:
        print 'Movie exits'
        #if record exists do nothing ,its movie,movie never changes
    else:
        print 'Movie not exists'
        print 'insert..'

        if len(datelist) < 4:
            datelist = ["1","January","1971"]

        #if movie is less than 2 months old
        #insert into movie
        if(is_date_older_2months(datelist) == False):

            print globalmovieid,i,title
            print ' '.join(datelist)

            if not rating_value:
                rating_value = 5

            if not content_rating:
                content_rating = "R"


            sql = "insert into Movie(movieid,imdbid,title,plot,altplot,date,year,month,day,genre,ratings,ratingvalue,contentrating,poster) " \
                  "values('%s','%s','%s','%s','%s','%s','%d','%s','%s','%s','%s','%s','%s','%s') " % \
                  (globalmovieid,i,title.strip(),plot.strip()," ",' '.join(datelist),int(datelist[2]),month_to_int(datelist[1]),0,genre,int(ratings),float(rating_value),content_rating,poster)

            try:
                cur.execute(sql)
                db.commit()
            except MySQLdb.Error, e:
                db.rollback()
                print e

        else:
Ejemplo n.º 23
0
 def updateState(self):
     for officer in self.officers:
         cursor = db.getCursor()
         cursor.execute('select * from officer where officer_id =%s',  int(officer.ids.badgeNum.text))
         for row in cursor:
             if row['status'] is 0:
                 officer.ids.tenSeven.state = 'normal'
                 officer.ids.tenEight.state = 'down'
                 officer.ids.onScene.state = 'normal'
             if row['status'] is 1:
                 officer.ids.tenSeven.state = 'down'
                 officer.ids.tenEight.state = 'normal'
             if row['on_scene'] is 1:
                 officer.ids.onScene.state = 'normal'
             if row['on_scene'] is 0:
                 officer.ids.onScene.state = 'down'
Ejemplo n.º 24
0
def getMapping(table_name):
    result = {}
    cur = db.getCursor()
    from_index = 0
    batch_size = 33333
    while True:
        sql = "SELECT new_id, orig_id FROM " + table_name + \
               " LIMIT " + str(batch_size) + " OFFSET " + str(from_index)
        db.execute(cur, sql)
        processed = False
        for row in cur.fetchall():
            processed = True
            result[row["orig_id"]] = row["new_id"]
        if not processed: break
        from_index += batch_size
    return result
Ejemplo n.º 25
0
def insert_series_into_movie(imdb,globalmovieid,title,genre,content_rating,ratings,rating_value,plot,poster):

    db = getCursor()
    cur = db.cursor()
    if not rating_value:
        rating_value = 5
    if not content_rating:
        content_rating = "R"
    title = remove_all_special_chars(title)
    plot = remove_all_special_chars(plot)
    sql = "insert into Movie(movieid,imdbid,title,plot,altplot,genre,ratings,ratingvalue,contentrating,poster) " \
                  "values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s') " % \
                  (globalmovieid,imdb,title.strip(),plot.strip()," ",genre,int(ratings),float(rating_value),content_rating,poster)
    try:
        cur.execute(sql)
        db.commit()
    except MySQLdb.Error, e:
        db.rollback()
        print e
Ejemplo n.º 26
0
 def buildPrevCalls(self):
     cursor = db.getCursor()
     cursor.execute('select * from calls where officer_id = %s', globals.info[1])
     for call in cursor:
         self.addCall(call['time_start'], call['street_address'], call['call_id'])
Ejemplo n.º 27
0
import logging
from optparse import OptionParser
import os
import sqlite3
import urllib, json
# Dirty hack to improt from parent directory. TODO packaging...
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
import db
from db import parser

#parser.add_argument("--download_datanest", action="store_true")
options = parser.parse_args()

db.connect(False)
cur = db.getCursor()

sql = (
        "SELECT ico, trzby2015, zisk2015, datum_vzniku " + \
        "FROM company_stats " + \
        "WHERE trzby2015 IS NOT NULL"
)

cur = db.execute(cur, sql)
data = {}
for row in cur.fetchall():
    ico = int(row["ico"])
    data[ico] = row

print "Number of companies with data", len(data)
Ejemplo n.º 28
0
def checkOnline():
    time.sleep(.25)
    print("starting thread")
    # Run while dispatcher screen is running
    while globals.dispRunning is True:
        # Build database information for all on duty officers
        cursor = db.getCursor()
        cursor.execute("select * from officer where  on_duty = True")
        checkOffline(
        )  # Check to see if any officers previously online have gone offline
        # Cycle through all the online officers
        for row in cursor:
            create = True  # If True at the end, we will create new officer
            # If length of online officers is 0, we do not need to check if all status', can assume offline prior
            if len(globals.onlineOfficers) is 0:
                #  Build the officer
                cur = [
                    int(row["officer_id"]),
                    str(row["last_name"]),
                    bool(row["status"]),
                    bool(row["on_duty"])
                ]
                curOff = classes.officer(cur)
                globals.screens[2].ids.ob.putOfficerIn(
                    curOff.id
                )  # Run disp screens add officer to screen function
                globals.onlineOfficers.append(curOff)
            # If the list of online officers is not 0 (Online officers prior to current loop)
            else:
                # Run through all officers
                for off in globals.onlineOfficers:
                    # If the officer was previously online prior to loop, update their status
                    if row["officer_id"] == int(off.id):
                        off.active = row["status"]
                        off.onScene = row["on_scene"]
                        # Update their buttons
                        globals.screens[2].ids.ob.getOfficer(
                            row["officer_id"]).change23Button(row["on_scene"])
                        globals.screens[2].ids.ob.getOfficer(
                            row["officer_id"]).changeStatusButton(
                                row["status"])
                        if off.active is True:
                            db.setCallInactive(db.getCurCall(
                                row["officer_id"]))
                            globals.screens[2].ids.ob.getOfficer(
                                row["officer_id"]).change23Button(False)
                            db.updateOnScene(row["officer_id"], False)
                        # No need to create this officer, they were previously online
                        create = False
                # If we are creating a new officer, new one came online, build officer much like if list was 0
                if create is True:
                    cur = [
                        int(row["officer_id"]),
                        str(row["last_name"]),
                        bool(row["status"]),
                        bool(row["on_duty"])
                    ]
                    curOff = classes.officer(cur)
                    globals.screens[2].ids.ob.putOfficerIn(curOff.id)
                    globals.onlineOfficers.append(curOff)
        cursor.close()
        time.sleep(.25)
    print("stopping thread")
Ejemplo n.º 29
0
 def changeText(self):
     cursor = db.getCursor()
     cursor.execute('select report_file from calls where call_id = %s', self.callid)
     for row in cursor:
         self.ids.reportText.text = row['report_file']
Ejemplo n.º 30
0
import datetime
import db
import parsec

dbc = db.dbConnect('localhost', 'root', 'atlas', 'parsec')
cursor = db.getCursor(dbc)

db.createTable(cursor, 'reports')
db.createTable(cursor, 'companies')

print ''
print ''
print '********** PARSEC **********'
print '****************************'
print ''

valid = 0
total = 0

info = {}
info['start'] = datetime.datetime.now()

completed = db.countReports(dbc, cursor)
print str(completed['count']) + ' Existing Reports'
print ''

for year in range(2016, 1994, -1):
    for qtr in range(4, 0, -1):
        index = parsec.getIndex(year, qtr)

        for report in index:
Ejemplo n.º 31
0
def scrapActor(i,globalmovieid,actor,link,username):
    print 'actor scraping start...'
    print actor,link
    db = getCursor()
    cur = db.cursor()
    #first check if actor already in database
    actor = remove_all_special_chars(actor)
    actor = actor.split(" ")
    username = remove_all_special_chars(username)
    name = ""
    lastname = ""
    if len(actor) == 1:
       name = actor[0]
    else:
        name = actor[0]
        lastname = actor[1]

    cur.execute("select count(1) from Persons where name = %s and lastname = %s ", [name,lastname])
    if cur.fetchone()[0]:
        print 'Actor exits'
        #check if is in this movie casts
        insert_actor_to_casts(globalmovieid,name,lastname,username)
    else:
        print 'new Actor'
        #1first Persons
        #bio info
        #Born date
        page = requests.get(link, headers=headers);
        tree = html.fromstring(page.content);
        tree.make_links_absolute(link)

        shortbio = tree.xpath('//div[@class="name-trivia-bio-text"]/div[@class="inline"]/text()')
        fullbiolink = tree.xpath('//div[@class="name-trivia-bio-text"]/div[@class="inline"]/span/a/@href')
        borndate = tree.xpath('//div[@id="name-born-info"]/time/@datetime')
        profilepic = tree.xpath('//td[@id="img_primary"]/div[@class="image"]/a/img/@src')
        photosurl = tree.xpath('//div[@class="see-more"]/a[1]/@href')

        #insert into persons
        if shortbio:
            if profilepic:
                if not borndate:
                    borndate = [" "]

                db = getCursor()
                cur = db.cursor()

                sql = "insert into Persons(name,lastname,birth) values(%s,%s,%s)"
                try:
                    cur.execute(sql,[name,lastname,' '.join(borndate)])
                    db.commit()
                except MySQLdb.Error, e:
                    print e
                    db.rollback()
                    actorinfo = []
                    actorinfo.append("Actor")
                    actorinfo.append(name)
                    actorinfo.append(lastname)
                    sql = "insert into Errors(movieid,info,level) values('%s','%s','%s')" % \
                    (globalmovieid,i,' '.join(actorinfo))
                    try:
                      cur.execute(sql)
                      db.commit()
                    except MySQLdb.Error, e:
                      print "error",e
                      db.rollback()
                    db.close()


                #2then casts
                insert_actor_to_casts(globalmovieid,name,lastname,username)
                #3then personsInfo
                if fullbiolink:
                    page = requests.get(fullbiolink[0], headers=headers);
                    tree = html.fromstring(page.content);
                    tree.make_links_absolute(fullbiolink[0])

                    minibio = tree.xpath('//div[@id="bio_content"]/div[@class="soda odd"]/p[1]/text()')
                    minibio = remove_all_special_chars(''.join(minibio))
                    #if any exists from below
                    #mini bio
                    #trivia
                    #Personal Quotes
                    #insert extra info link bio trivia personal quotes to personinfo
                    personid = getPersonsId(name,lastname)
                    db = getCursor()
                    cur = db.cursor()
                    sql = "insert into PersonInfo(bio,personid,profilepic) values(%s,%s,%s)"
                    try:
                        cur.execute(sql,[minibio.strip(),personid,''.join(profilepic).strip()])
                        db.commit()
                    except MySQLdb.Error, e:
                        print e
                    #4Last photos from photo page
                    if photosurl:
                        actorphoto(personid,''.join(photosurl[0]))
                else:
Ejemplo n.º 32
0
        cursor.connection.rollback()
        print "ERROR"
        print e
        print "Error insert >>%s<<" % user

sectorMapping = { 'Academic': 'edu', 'Government': 'gov', 'Media': 'other', 'Other': 'other',
'Independent': 'other', 'Multilateral Agency': 'gov', 'Private Sector': 'com', 'NGO': 'org', 'Student': 'edu'}
def mapSector(s):
    if s in sectorMapping:
        return sectorMapping[s]
    else:
        print "no mapping for %s" % s
        return 'other'


cursor = db.getCursor()
#geolocator = Nominatim()
geolocator = GoogleV3(api_key='AIzaSyCjJduX95CXz3LtiX5sfw19GhqcicVYs6c', timeout=5)

print geolocator.geocode('United Kingdom')
print 'done'
import code
code.interact(local=locals())

GEOCODE = True
DRYRUN = False
COUNT = 2000

with open('ottawa-list-latest.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None)