예제 #1
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def queryEntry(conn, tar, tab, cond=None, agg=None, aggcond=None):
    '''Query the database for the entries with the given conditions,
    Args:
        conn:   The connection to the database
        tar:    Target list
        tab:    "From" conditions
        cond:   "Where" conditions
        agg:    aggregation conditions
        aggcond:"having" conditions
    Returns:
        result: Cursor pointing to the query results
    '''
    execstr = ""
    try:
        c = conn.cursor()
        execstr = "select " + ",".join(tar) + " "
        execstr += "from " + "(" + ",".join(tab) + ") "
        if cond is not None:
            execstr += "where " + "(" + cond + ") "
        if agg is not None:
            execstr += "group by " + "(" + ",".join(agg) + ") "
        if aggcond is not None:
            execstr += "having " + "(" + aggcond + ")"
        c.execute(execstr)
        return c
    except Exception:
        csplog.logexc(sys.exc_info())
        print execstr
        return None
    return None
예제 #2
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def getParticipation(commInfo, insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        res, jd = None, None
        for c in commInfo:
            endpoint = "/api/v2/committee_member?"
            endpoint += "committee=" + str(c[0])
            endpoint += "&limit=300"
            conn.request("GET", endpoint)
            res = conn.getresponse()
            checkResponse(res, endpoint)

            jd = json.loads(res.read())
            formatted = [(gUID(p[u'person'][u'id'],
                               p[u'committee'][u'id']), p[u'person'][u'id'],
                          p[u'committee'][u'id'], p[u'role'])
                         for p in jd[u'objects']]
            if insert:
                if not dbmngr.insertMany(dbc, "participates",
                                         ["id", "lid", "cid", "role"],
                                         formatted):
                    raise Exception("Database Insertion Error")
            else:
                pass
        dbc.close()
        conn.close()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #3
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genLikes(contentid, idlist, authorideo=None, ideolist=None):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        collist = ["id", "lid", "cid"]
        problist = []
        if authorideo is not None and ideolist is not None:
            problist = [0.01 * (4 - abs(i - authorideo)) for i in ideolist]
        else:
            problist = [0.05 for _ in idlist]

        likes = [
            idlist[i] for i in xrange(len(idlist))
            if random.random() < problist[i]
        ]
        likelist = [[
            unicode(
                uuid.uuid3(uuid.NAMESPACE_DNS,
                           str(i) + str(contentid) + 'l')), i, contentid
        ] for i in likes]
        if not dbmngr.insertMany(dbc, "likes", collist, likelist):
            raise Exception("Database Insertion Error at genVotes")
        dbc.close()
        return likes
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #4
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genReplies(num, iden, replyto):
    try:
        #generate with genTweets
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        replies = genTweets(num, iden, False)
        #insert into database
        collist = [
            "id",
            "time",
            "type",
            "contents",
            "author",
            "replyto",
        ]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t[-2])),
            unicode(datetime.datetime.now()), u"reply", t[-2], iden[0], replyto
        ] for t in replies]

        if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
            raise Exception("Database Insertion Error at genReplies")

        dbc.close()
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #5
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def createTable(conn, name, col, foreign=None):
    '''Create a table with the given connection
    Example:
        createTable(conn, "example", {"col1":["INT"], "col2": ["int","primary","not null"]})
    Args:
        conn:   The Connection to the database
        name:   The name of the table
        col:    A dictionary to the columns, with the keys as names, and values as type and
                Keywords such as "PRIMARY," "NOT NULL" etc. Values must be iterable
        foreign:A dictionary to indicate which columns are foreign. Keys are the local columns,
                and Values are foreign columns
    Returns:
        bool: Whether the operation was successful
    '''
    execstr = ""
    try:
        c = conn.cursor()
        #name = sanitize(name)
        execstr = "create table " + name + "("
        for k, v in col.iteritems():
            execstr += k + " "
            execstr += " ".join(v)
            execstr += ","
        if foreign is not None:
            for (k, v) in foreign.iteritems():
                execstr += "foreign key(" + k + ") references " + v + ","
        execstr = execstr[:-1] + ")"
        c.execute(execstr)
        conn.commit()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        print execstr
        return False
    return False
예제 #6
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def insertEntry(conn, table, entry):
    '''Insert an entry into the database referred to by the connection
    Example:
        insertEntry(conn,"sampleTable",{"col1":2,"col2":0})
    Args:
        conn:   The connection to the database
        table:   The name of the table
        entry:  A dictionary of the columns that the entry has, and the respective values
    Returns:
        bool: whether the operation was successful
    '''
    execstr = ""
    try:
        c = conn.cursor()
        execstr = "insert into " + table
        col = "("
        val = "("
        col += ",".join(entry.keys())
        v = entry.values()
        v = ["'" + w + "'" for w in v]
        val += ",".join(v)
        col += ")"
        val += ")"
        execstr += " " + col + " values " + val + ";"
        c.execute(execstr)
        conn.commit()
        return True
    except Exception:
        print execstr
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #7
0
파일: nwmngr.py 프로젝트: nanflasted/C-SPAN
def getAllTweets(screen_name, filename):
    try:
        #Twitter only allows access to a users most recent 3240 tweets with this method
        consumer_key, consumer_secret, access_key, access_secret = twcred.auth(
        )
        #authorize twitter, initialize tweepy
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_key, access_secret)
        api = tweepy.API(auth)

        alltweets = []
        i = 0
        for s in screen_name:
            try:
                alltweets.extend(api.user_timeline(screen_name=s, count=200))
                i += 1
                print str(i) + "/" + str(len(screen_name)) + " done"
            except Exception as e:
                print s, e
        outtweets = [tweet.text.encode("utf-8") for tweet in alltweets]
        outtweets = map(removeLinks, outtweets)
        with open(filename, 'w+') as f:
            for t in outtweets:
                f.write(t + '\n')
        return True
    except Exception as e:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #8
0
파일: nwmngr.py 프로젝트: nanflasted/C-SPAN
def genTweetBlobs(twaccnts):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        print "query for id, ideology done"
        for i in range(5):
            acclist = [twaccnts[k[0]] for k in idlist if int(k[1]) == i]
            datadir = './data/twblobs/' + str(i) + '/'
            getAllTweets(acclist, datadir + 'input.txt')
            print "input file generation for ideology " + str(i) + " success"
            res = subprocess.call([
                "python", "./rnn/train.py", "--data_dir=" + datadir,
                "--save_dir=" + datadir + 'model/', "--rnn_size=" + str(32),
                "--num_epochs=" + str(1), "--seq_length=" + str(10),
                "--learning_rate=" + str(0.003), "--model=lstm"
            ])
            if res != 0:
                raise Exception(
                    "Training subprocess call Error at genTweetBlobs")
            print "model trained for ideology " + str(i)
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #9
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def updateMany(conn, table, cols, vals):
    '''Update certain columns on certain entries in the given table, with the given values.
    Example:
    Args:
        conn:   connection to the database
        table:  the table to be targeted for updates
        cols:   the columns to be updated
        vals:   the values to be inserted as the update, format: (value1,value2,...,valuen,id)
    Returns:
        bool:   whether the operation was successful
    '''
    execstr = ""
    try:
        if len(cols) != len(vals[0]) - 1: return False
        c = conn.cursor()
        execstr = "update " + table + " set "
        for col in cols:
            execstr += col + "= ?,"
        execstr = execstr[:-1] + " where id = ?;"
        c.executemany(execstr, vals)
        conn.commit()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        print execstr
        return False
    return False
예제 #10
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genMemes(num, iden, background, insert=True, primer=None):
    try:
        #generate tweets with genTweets(num,iden)
        tw = genTweets(num, iden, False, primer)
        #break up tweets into 2 parts randomly
        for t in tw:
            k = random.choice(range(1, len(t[-2].split(" ")) - 2))
            t[-2] = "<MEME>".join(t[-2].split(t[-2].split(" ")[k]))
        print "top/bottom text generated, separated by string <MEME>"
        collist = ["id", "time", "type", "contents", "author", "memebg"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t[-2])),
            unicode(datetime.datetime.now()), u'meme', t[-2], iden[0],
            background
        ] for t in tw]
        #insert into database
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            dbc.text_factory = str
            if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
                raise Exception("Database Insertion Error at genMemes")
            dbc.close()
        print "Meme generation completed"
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #11
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def insertMany(conn, table, cols, entrylist):
    '''For inserting many rows at the same time
    Args:
        conn:   The connection to the database
        table:  The table to be inserted into
        cols:   List of columns related to the entry
        entrylist:  List of tuples of entry values to be inserted.
    Returns:
        bool:   Whether the operation was successful
    '''
    execstr = ""
    try:
        if len(cols) != len(entrylist[0]): return False
        c = conn.cursor()
        colstr = "(" + ",".join(cols) + ")"
        qmstr = "(" + ",".join(["?" for _ in xrange(len(entrylist[0]))]) + ")"
        execstr = "insert into " + table + " " + colstr + " values " + qmstr + ";"
        c.executemany(execstr, entrylist)
        conn.commit()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        print execstr
        return False
    return False
예제 #12
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genTweets(num, iden, insert=True, prime=None):
    '''generate and insert tweets under iden's name, according to iden's ideology'''
    def dict_factory(cursor, row):
        d = {}
        d[row[0]] = row[1]
        return d

    try:
        #acquire ideology blob
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        #dbc.row_factory = dict_factory
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at genTweets")
        idict = idcur.fetchall()
        idict = {t[0]: t[1] for t in idict}
        modeldir = './data/twblobs/' + str(int(idict[iden[0]])) + '/model/'
        dbc.close()
        #generate tweets
        gentweets = []
        for i in range(num):
            numwords = random.choice(range(15, 30))
            params = [
                "python", "./rnn/sample.py", "--save_dir", modeldir, "-n",
                str(numwords), "--sample",
                str(1)
            ]
            if prime is not None:
                params += ["--prime", prime]
            gentweets += [subprocess.check_output(params).split("\n")[1]]
        print("tweets"
              if insert else "reply") + " generation from model complete"

        collist = ["id", "time", "type", "contents", "author"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, t)),
            unicode(datetime.datetime.now()), u'post', t, iden[0]
        ] for t in gentweets]
        if insert:
            #insert into database
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            dbc.text_factory = str
            if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
                raise Exception("Database Insertion Error at genTweets")
            dbc.close()

        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #13
0
파일: nwmngr.py 프로젝트: nanflasted/C-SPAN
def genBillBlobs():
    try:
        datadir = './data/bills/'
        res = subprocess.call([
            "python", "./rnn/train.py", "--data_dir=" + datadir,
            "--save_dir=" + datadir + 'model/', "--rnn_size=" + str(64),
            "--num_epochs=" + str(3), "--seq_length=" + str(10),
            "--learning_rate=" + str(0.003), "--model=lstm"
        ])
        if res != 0:
            raise Exception("Training subprocess call Error at genBillBlobs")
        print "model trained for bills"
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #14
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genVotes(billid, voters):
    try:
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        collist = ["id", "lid", "cid", "votes"]
        voteres = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS,
                               str(i) + str(billid))), i, billid,
            "yea" if random.choice(range(2)) > 0 else "nay"
        ] for i in voters]
        if not dbmngr.insertMany(dbc, "votes", collist, voteres):
            raise Exception("Database Insertion Error at genVotes")
        dbc.close()
        return voteres
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #15
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def genBills(num, committee, iden):
    try:
        res = []
        for i in range(num):
            #generate a random number k, k in [1,5]
            k = random.choice(range(1, 6))
            #generate k bill title literals from bill blob
            literals = []
            modeldir = "./data/bills/model/"
            genlits = []
            for _ in range(k):
                numwords = random.choice(range(1, 4))
                genlits += [
                    subprocess.check_output([
                        "python", "./rnn/sample.py", "--save_dir", modeldir,
                        "-n",
                        str(numwords), "--sample",
                        str(1)
                    ]).split("\n")[1].capitalize()
                ]

            #concatenate literals
            res += [((", ".join(genlits[:-1]) + " and ") if k > 1 else "") +
                    genlits[-1] + " Act of 2017"]
            print res
            print "{0}/{1} bills generated".format(i + 1, num)
        #insert into database
        collist = ["id", "time", "type", "contents", "author", "committees"]
        contentlist = [[
            unicode(uuid.uuid3(uuid.NAMESPACE_DNS, r)),
            unicode(datetime.datetime.now()), u'bill', r, iden[0], committee
        ] for r in res]
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        dbc.text_factory = str
        if not dbmngr.insertMany(dbc, "contents", collist, contentlist):
            raise Exception("Database Insertion Error at genBills")
        dbc.close()
        print "Bills insertion complete"
        return contentlist
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #16
0
def getIdeology():
    '''Get all of the legislators' ideology score
    Args:
        None
    Returns:
        dict:       A dictionary with {id->ideology}, None if operation failed
    '''
    try:
        dbc = dbmngr.connectDB('./data/', 'cspdb', False)
        idcur = dbmngr.queryEntry(dbc, ["id", "ideology"],
                                  ["legislators"]).fetchall()
        dbc.close()
        iddict = {i[0]: i[1] for i in idcur}
        csplog.logevent("query", "queried all ideologies")
        return iddict
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #17
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def populate(t, r, insert=True):
    try:
        idlist = None
        if r > 0:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            idlist = dbmngr.queryEntry(dbc, ["id"], ["legislators"]).fetchall()
            dbc.close()
        if t == 0: return True
        for l in idlist:
            tweets = genTweets(t, l, True)
            if r == 0: continue
            for tw in tweets:
                reper = random.choice(idlist)
                genReplies(r, reper, tw[0])
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #18
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def updateLegisImg():
    def nonexist():
        '''handle the situation where person doesn't have an image on govtrack'''
        return open("./data/noimg.jpeg", "rb").read()

    def getImg(conn, iden):
        '''Gets the person 'iden's image from govtrack'''
        endpoint = "/data/photos/" + str(iden[0]) + "-200px.jpeg"
        try:
            conn.request("GET", endpoint)
            res = conn.getresponse()

            if res.status != 200:
                #means this person doesn't have an image on the govtrack database
                if res.status == 404:
                    print endpoint
                    return buffer(nonexist())
                else:
                    raise Exception(
                        "HTTP error:" + str(res.status) + " at updateLegisImg",
                        endpoint)
            return res.read()
        except Exception as e:
            print endpoint
            return buffer(nonexist())

    try:
        conn = httplib.HTTPSConnection(govhost)
        dbc = dbmngr.connectDB("./data/", "cspdb", False)
        idcur = dbmngr.queryEntry(dbc, ["id"], ["legislators"])
        if idcur is None:
            raise Exception("Query Error at updateLegisImg")
        idlist = idcur.fetchall()
        updlist = [(sqlite3.Binary(getImg(conn, p)), p[0]) for p in idlist]
        if not dbmngr.updateMany(dbc, "legislators", ["image"], updlist):
            raise Exception("Update Error at updateLegisImg")
        conn.close()
        dbc.close()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        return False
    return False
예제 #19
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def removeEntry(conn, table, cond):
    '''Remove all entries within the given table with the given conditions
    Args:
        conn:   The connection to the database
        table:  "From" clause conditions
        cond:   "Where"clause conditions
    Returns:
        bool:   Whether the operation was successfu;
    '''
    execstr = ""
    try:
        c = conn.cursor()
        execstr = "delete from " + table
        if cond is not None:
            execstr += " where " + cond
        c.execute(execstr)
        conn.commit()
        return True
    except Exception:
        csplog.logexc(sys.exc_info())
        print execstr
        return False
    return False
예제 #20
0
파일: dbmngr.py 프로젝트: nanflasted/C-SPAN
def connectDB(directory, name, new=False):
    """Connect to a given database, creates one if it doesn't exist
    Args:
        directory:  The directory of the database
        name:   The name (or intended name) of the database
        new:    Whether the method should create a new database
    Returns:
        conn:   The connection to the Database, None if database doesn't exist and the
                instruction is to not create one; or if there was an exception
    """
    try:
        namestr = directory + name + ".db"
        if os.path.isfile(namestr) and new:
            os.remove(namestr)
            conn = sqlite3.connect(namestr)
        elif os.path.isfile(namestr) or new:
            conn = sqlite3.connect(namestr)
        else:
            return None
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return conn
예제 #21
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def getCommInfo(insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        endpoint = "/api/v2/committee?obsolete=false&committee=null&limit=300"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        jd = json.loads(res.read())
        formatted = [(c[u'id'], c[u'name'], c[u'jurisdiction'],
                      c[u'committee_type']) for c in jd[u'objects']]
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            if not dbmngr.insertMany(dbc, "committees",
                                     ["id", "name", "desc", "floor"],
                                     formatted):
                raise Exception("Database Insertion Error")
        else:
            pass
        dbc.close()
        conn.close()
        return formatted
    except Exception:
        csplog.logexc(sys.exc_info())
        return None
    return None
예제 #22
0
파일: dbpopl.py 프로젝트: nanflasted/C-SPAN
def getBasicInfo(insert=True):
    try:
        conn = httplib.HTTPSConnection(govhost)
        endpoint = "/api/v2/role?"+\
                "current=true&"+\
                "role_type__in=senator|representative&"+\
                "fields=person__firstname,person__lastname,state,person__twitterid,person__id,person__name,party,role_type&"+\
                "limit=600"

        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        data = json.loads(res.read())
        print "scraped basic info from govtrack"

        endpoint = "/data/us/" + caucusnum + "/stats/sponsorshipanalysis_h.txt"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        ideo = res.read().split("\n")
        ideo = ideo[1:-1]
        print "scraped house ideology"

        endpoint = "/data/us/" + caucusnum + "/stats/sponsorshipanalysis_s.txt"
        conn.request("GET", endpoint)
        res = conn.getresponse()
        checkResponse(res, endpoint)
        ideo.extend(res.read().split("\n")[1:-1])
        print "scraped senate ideology"

        ideo = [k.split(",") for k in ideo]
        ideo = sorted(ideo, key=lambda l: l[1])
        binsize = len(ideo) // 5
        for i in range(5):
            for j in range(binsize * i, binsize * (i + 1) - 1):
                ideo[j][1] = i
        for j in range(binsize * 4, len(ideo)):
            ideo[j][1] = 4

        print "ideology formatted"
        conn.close()

        ideo = {int(p[0]): p[1] for p in ideo}

        formatted = [
            (p[u'person'][u'id'],
             (p[u'person'][u'firstname'] + " " + p[u'person'][u'lastname']),
             p[u'person'][u'name'], p[u'role_type'], p[u'party'], p[u'state'],
             ideo[p[u'person'][u'id']], None) for p in data[u'objects']
        ]
        if insert:
            dbc = dbmngr.connectDB("./data/", "cspdb", False)
            if not dbmngr.insertMany(dbc,"legislators",\
                    ["id","name","desc","role","party","state","ideology","image"],formatted):
                raise Exception("Database Insertion Error")
            dbc.close()
            return {
                p[u'person'][u'id']: p[u'person'][u'twitterid']
                for p in data[u'objects']
            }
        else:
            pass

        return formatted

    except Exception:
        csplog.logexc(sys.exc_info())
        return None

    return None