コード例 #1
0
from pymldb import Connection
mldb = Connection("http://localhost/")

mldb.put(
    '/v1/procedures/import_bench_train_1m', {
        "type": "import.text",
        "params": {
            "dataFileUrl":
            "https://s3.amazonaws.com/benchm-ml--main/train-1m.csv",
            "outputDataset": "bench_train_1m",
            "runOnCreation": True
        }
    })

mldb.put(
    '/v1/procedures/import_bench_test', {
        "type": "import.text",
        "params": {
            "dataFileUrl": "https://s3.amazonaws.com/benchm-ml--main/test.csv",
            "outputDataset": "bench_test",
            "runOnCreation": True
        }
    })

mldb.put(
    '/v1/procedures/benchmark', {
        "type": "classifier.experiment",
        "params": {
            "experimentName": "benchm_ml",
            "inputData": """
            select
コード例 #2
0
from pymldb import Connection

mldb = Connection("http://localhost:8080")

inceptionUrl = 'http://public.mldb.ai/models/inception_dec_2015.zip'

mldb.put('/v1/functions/fetch', {"type": 'fetcher', "params": {}})
print("done")

mldb.put(
    '/v1/functions/inception', {
        "type": 'tensorflow.graph',
        "params": {
            "modelFileUrl":
            'archive+' + inceptionUrl + '#tensorflow_inception_graph.pb',
            "inputs": 'fetch({url})[content] AS "DecodeJpeg/contents"',
            "outputs": "softmax"
        }
    })
print("done")

mldb.put(
    "/v1/procedures/imagenet_labels_importer", {
        "type": "import.text",
        "params": {
            "dataFileUrl": 'archive+' + inceptionUrl +
            '#imagenet_comp_graph_label_strings.txt',
            "outputDataset": {
                "id": "imagenet_labels",
                "type": "sparse.mutable"
            },
コード例 #3
0
ファイル: train_matrix.py プロジェクト: MeltingCake/twodate
from pymldb import Connection
mldb = Connection()#starts connection with mldb

mldb.put('/v1/datasets/raw_data', { #loads in raw data
    "type":"text.csv.tabular",
    "params": {
        "dataFileUrl":"file:///mldb_data/sample.csv",
        'delimiter':'', 'quotechar':''
        }
    })

mldb.put('/v1/procedures/sparse_matrix',{
        "type":"transform",
        "params":{
            "inputData":"select tokenize(lineText,{offset:1, value:1}) as * from raw_data",
            "outputDataset":"sparse_matrix",
            "runOnCreation":True
        }
    })

mldb.put('/v1/procedures/svd_matrix', {
        "type" : "svd.train",
        "params" : {
            "trainingData" : """
            SELECT
                COLUMN EXPR (AS columnName() ORDER BY rowCount() DESC, columnName() LIMIT 4000)
            FROM sparse_matrix
            """,
            "columnOutputDataset" : "location_svd_embedding",
            "modelFileUrl" : "file://svd/svd_matrix.svd",
            "functionName": "location_svd_embedder",
コード例 #4
0
ファイル: train.py プロジェクト: hexavi42/McHacks
class Candidate_Predictor:
    candidates = []
    candidate_favor = {}
    mldb = None
    theta = []
    depth = False

    def __init__(self, port=8080, pool=all_candidates, depth=False):
        self.mldb = Connection(host="http://localhost:{0}".format(port))
        self.set_wordnet()
        self.candidates = pool
        self.depth = depth

    # Tickles SentiWordnet, removing POS data
    def set_wordnet(self):
        self.mldb.put('/v1/procedures/sentiwordneter', {
            "type": "import.sentiwordnet",
            "params": {
                "dataFileUrl": "file:///mldb_data/SentiWordNet_3.0.0_20130122.txt",
                "outputDataset": "sentiwordnet",
                "runOnCreation": True
            }
        })
        self.mldb.put("/v1/procedures/baseWorder", {
            "type": "transform",
            "params": {
                "inputData": """
                    select *, jseval('
                        return x.split("#")[0];
                    ', 'x', rowName()) as baseWord
                    from sentiwordnet
                """,
                "outputDataset": "senti_clean",
                "runOnCreation": True
            }
        })
        self.mldb.put("/v1/procedures/baseWorder", {
            "type": "transform",
            "params": {
                "inputData": """
                       select avg({* EXCLUDING(baseWord)}) as avg,
                              min({* EXCLUDING(baseWord)}) as min,
                              max({* EXCLUDING(baseWord)}) as max,
                       count(*) as cnt
                        NAMED baseWord
                        from senti_clean
                        group by baseWord
                        order by cnt desc
                """,
                "outputDataset": "senti_clean2",
                "runOnCreation": True
            }
        })

    # check sentiment of sentence
    def return_sent(self, sentence):
        # remove quotes because it messes with query
        no_quote = sentence.replace("'", '')
        split = list(set(no_quote.split(' ')))
        join = "','".join(split)
        sent_sent = self.mldb.query("select avg* from senti_clean2 where rowName() in ('{0}')".format(join))
        overall_senti = 0
        if 'avg.NegSenti' in sent_sent.keys():
            for word in sent_sent['avg.NegSenti'].keys():
                overall_senti += sent_sent['avg.PosSenti'][word]-sent_sent['avg.NegSenti'][word]
        return overall_senti

    # run tweet csvs about candidates through sentiment analysis
    def run_candidates(self):
        for candidate in self.candidates:
            states = {}
            counter = 0
            with open('data/{0}.csv'.format(candidate), 'rb') as csvfile:
                spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
                for row in spamreader:
                    if self.depth and counter > self.depth:
                        break
                    elif len(row) == 2 and row[1]:
                        state = normalize_state_name(row[1])
                        if state is None:
                            pass
                        else:
                            overall_senti = self.return_sent(row[0])
                            if state not in states:
                                states[state] = overall_senti
                            else:
                                states[state] = states[state]+overall_senti
                            counter += 1
                    else:
                        pass
            self.candidate_favor[candidate] = states

    # Returns the sentiment value stored in the database
    def get_sentiment_value(self, candidate, state):
        self.candidate_favor[all_candidates[candidate]][state]

    # Gets all the results stored in the database
    def get_results(self):
        # Format: [[CANDIDATE_ID, STATE_ID, VOTE_PERCENTAGE], ...]
        pass

    # Calculates the parameteres using normal equations
    def calculate_params(self):
        inp = []
        out = []
        results = self.get_results()
        for r in results:
            candidate = r[0]
            state = r[1]
            # TODO: More inputs
            inp.append(self.get_sentiment_value(candidate, state))
            out.append(r[3])
        # Linear regression
        x = np.array(inp)
        self.theta = np.multiply(np.multiply(np.linalg.inv(np.multiply(np.transpose(x), x)), np.transpose(x)), out)

    # Trains all the sentiment values based on the expected results
    def train(self):
        self.run_candidates()
        self.calculate_params()

    # Predicts the situation for a given list of candidates for a specific state
    # Returns a map of the percentage each candidate is predicted to have
    def predict(self, state):
        results = {}
        for candidate in self.candidates:
            inp = [self.get_sentiment_value(candidate, state)]
            # TODO: WTF
            results[candidate] = np.multiply(self.theta, inp)
        return results

    def save(self, file='sentiment.csv'):
        with open(file, 'w') as csvfile:
            fieldnames = ['candidate']+state_code.values()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for candidate in self.candidates:
                self.candidate_favor[candidate]['candidate'] = candidate
                writer.writerow(self.candidate_favor[candidate])