Python DataWriterの例、data_writer.DataWriter Pythonの例

コード例 #1

0

ファイルを表示

ファイル: owner.py プロジェクト: Aayush-Mehta/Basic-DBMS

    def owner_main():

        mydb = myc.connect(host="", user="", passwd="", database="")
        mycur = mydb.cursor()
        print("Welcome to the owner module")
        user = input("Enter your username")
        passw = input("Enter your password")
        if a.auth_main(user, passw):
            print(
                "Welcome back Dear owner \nPress one to check the logs of your current turfs\nPress 2 to add a new "
                "turf on your account\nPress three to exit ")
            ch = int(input())
            if ch == 1:
                x = ds.data_main(user)
                print(x)
                print(
                    "press the Name of thr turf to which you want to fetch the data"
                )
                name = input().lower()
                result = ds.data_main(name)
                for i in result:
                    print(i)
            elif ch == 2:
                name = input(
                    "Enter the name with which you want to make the entry")
                dw.owner_writer(name, user)
            elif ch == 3:
                print("Thanks for using AM's Database Manager")
                exit(1)
        else:
            print("Authentication Failed")

コード例 #2

0

ファイルを表示

ファイル: main.py プロジェクト: michaelhball/doctor-patient-sentence_classification

def classify_to_csv(encoder_type, classifier_type, simple_classifier,
                    extended_classifier, test_tsv, output_file):
    """
    Takes classifiers for each grain of classification and creates a csv identical
        to the test tsv but with predicted classifications and accuracies.
    """
    if encoder_type == "bow" or encoder_type == "lstm":
        simple = "/{0}_simple.pt".format(encoder_type)
        extended = "/{0}_extended.pt".format(encoder_type)
    elif classifier_type == "pooling":
        if args.word_embedding.startswith("fasttext"):
            simple = "/{0}_{1}_simple.pt".format(encoder_type, "fasttext")
            extended = "/{0}_{1}_extended.pt".format(encoder_type, "fasttext")
        else:
            simple = "/{0}_{1}_simple.pt".format(encoder_type, "glove")
            extended = "/{0}_{1}_extended.pt".format(encoder_type, "glove")
    simple_classifier.load_checkpoint(args.saved_models + simple)
    extended_classifier.load_checkpoint(args.saved_models + extended)
    dw = DataWriter(simple_classifier, extended_classifier, test_tsv,
                    output_file)
    dw.write()

コード例 #3

0

ファイルを表示

ファイル: main.py プロジェクト: shelterz/get_component_cve

                html = cve_getter.get_specific_search_results(
                    ven, product, version, index)
                cve_info = cve_getter.get_cve_info(html)
                index += PAGE_ITEM_MAX
                # We can only get 20 items in one page.
                item_count -= PAGE_ITEM_MAX
                for search_result in cve_info:
                    data_to_write.append(search_result.name)
                    data_to_write.append(search_result.desc)
                    data_to_write.append(search_result.date)
                    data_to_write.append(search_result.cvss)
                    writer.write_excel(data_to_write, has_title)
                    if has_title:
                        has_title = False
                    data_to_write.clear()
        else:
            logger.info(
                'Product %s, version %s not found. supported verions %s',
                product, version, versions)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='This a tool to fetch cve info from nvd.')
    parser.add_argument('product', help="The product name.")
    parser.add_argument('version', help="The product version.")
    args = parser.parse_args()
    cve_writer = DataWriter()
    get_cve_results(args.product, args.version, cve_writer)
    cve_writer.close()

コード例 #4

0

ファイルを表示

                print("... %s of %s : Downloading %s data " %
                      (index, len(rows), company))

                df_data = self.downloadStockData(market_type, code, year1,
                                                 month1, date1, year2, month2,
                                                 date2)
                if df_data is not None:
                    df_data_indexed = df_data.reset_index()
                    self.dbwriter.updatePriceToDB(code, df_data_indexed)

            index += 1
            #return

        self.dbhandler.endTrans()

        print("Done!!!")


if __name__ == "__main__":
    services.register('dbhandler', DataHandler())
    services.register('dbwriter', DataWriter())

    crawler = DataCrawler()
    #html_codes = crawler.downloadCode('2')
    #print(html_codes.__class__)
    #crawler.parseCodeHTML(html_codes, '2')

    #crawler.updateAllCodes()
    crawler.updateAllStockData(2, 2010, 1, 1, 2015, 12, 1, start_index=1)

コード例 #5

0

ファイルを表示

def twitter_search(search_method, search_terms_file, search_terms_col,
                   filter_method, num_iterations, testing_mode, flush_db):

    print_args([
        search_method, search_terms_file, search_terms_col, filter_method,
        num_iterations, testing_mode, flush_db
    ], [
        i.strip() for i in ('search_method,'
                            'search_terms_file, search_terms_col,'
                            'filter_method, num_iterations,'
                            'testing_mode, flush_db').split(',')
    ])

    start_time = time.time()

    if testing_mode:
        send_email(
            'Hi there!', 'Running script in testing mode at {}'.format(
                datetime.datetime.now()), ['*****@*****.**'])

    if flush_db:
        # Connect to database
        dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT)
        dw.flush(MONGO_DB_NAME)
        print('Database flushed. Stopping script.')
        return

    search_terms = read_search_terms(search_terms_file, search_terms_col)
    if testing_mode:
        # Limit the number of terms
        print('TEST MODE - Limiting the number of terms to 10.')
        search_terms = search_terms[:10]

    found_ids = {term: set() for term in search_terms}
    # TODO: *idea*
    # Avoid adding duplicate tweets.
    # Have collection of all tweets, and a lightweight collection
    # e.g. [{"term": "btc", "query": "$btc", "id": 2837127372}, ...etc]
    since_ids = {}
    for i_iter, _ in enumerate(range(num_iterations)):
        # Connect to database
        dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT)
        len_0 = dw.get_collection_len(MONGO_DB_NAME, MONGO_DB_COLLECTION)

        for term in tqdm(search_terms):
            print('Term = {}'.format(term))

            # authorize and load the twitter API
            api = load_api()

            if search_method == 'ticker':
                q = '${}'.format(term)
            elif search_method == 'hashtag':
                q = '#{}'.format(term)
            else:
                q = term

            if filter_method:
                if filter_method == 'crypto':
                    trash_filters = (
                        '-filter:retweets -filter:links '
                        '-gainers -losers -alert -alerts -changes '
                        '-change -changed -increased -decreased')
                else:
                    raise ValueError(
                        'Bad filter_method value: {}'.format(filter_method))
                q += ' {}'.format(trash_filters)

            print('Query = {}'.format(q))

            # Start searching for tweets from 1 day ago
            until_date = (DATE -
                          datetime.timedelta(days=1)).strftime('%Y-%m-%d')

            # Search backwards until since_id is reached
            since_id = get_since_id(term)

            api, tweets, _found_ids, since_id = \
                tweet_search(api, term, q,
                             found_ids=found_ids[term],
                             since_id=since_id,
                             until_date=until_date,
                             local_filestore=JSON_FILE_PATH,
                             dw=dw,
                             testing_mode=testing_mode)

            found_ids[term] = _found_ids
            since_ids[term] = since_id

        print('Finished iteration = {}/{}'.format(i_iter + 1, num_iterations))

        # Get number of new tweets found, and write to file
        num_new_documents = dw.get_collection_len(MONGO_DB_NAME,
                                                  MONGO_DB_COLLECTION) - len_0
        print('Saved {} new tweets'.format(num_new_documents))
        if not os.path.exists('num_iterations_report.log'):
            with open('num_iterations_report.log', 'w') as f:
                f.write('date,iteration,num_new_tweets_found\n')
        with open('num_iterations_report.log', 'a') as f:
            f.write('{},{},{},\n'.format(DATE_STR, i_iter, num_new_documents))

    dump_since_ids(since_ids)
    print('Done pulling data up to {} UTC'.format(DATE_STR))
    end_time = (time.time() - start_time) / 3600
    print('Runtime = {} hours'.format(end_time))
    with open(os.path.join(DIR_PATH, 'runtime.log'), 'a+') as f:
        f.write('{} - {} hours\n'.format(datetime.datetime.now(), end_time))

コード例 #6

0

ファイルを表示

def tweet_search(api,
                 term,
                 query,
                 found_ids,
                 until_date='',
                 since_id=0,
                 save_freq=1500,
                 dw=None,
                 local_filestore='',
                 num_iterations=3,
                 testing_mode=False):
    """
    Search through recent tweets matching query, starting from previous ID
    if available or from oldest tweet exposed through the search API.
    """
    if not local_filestore:
        raise ValueError(
            'Please specify local_filestore in tweet_search function')
    if not dw:
        # Connect to database
        dw = DataWriter(MONGO_DB_HOST, MONGO_DB_PORT)

    # Set date to start search (will search backwards from this point)
    if not until_date:
        until_date = (datetime.datetime.utcnow() - datetime.timedelta(days=1))\
                        .strftime('%Y-%m-%d')

    if testing_mode:
        save_freq = 5

    errors = 0
    max_errors = 5
    attempt = 0
    max_attempts = 1
    searched_tweets = []
    _since_id = since_id
    next_since_id = None
    _max_id = None
    # Run the tweet search loop. Starting at until_date and going back
    # until no more tweets are available OR the since_id is reached
    while True:
        try:
            # Twitter API searches backwards, starting at most recent
            # tweets above since_id and lower than max_id. We search
            # from max_id to since_id, updating max_id after each iteration.
            if _max_id:
                new_tweets = api.search(q=query,
                                        count=100,
                                        since_id=str(_since_id),
                                        max_id=str(_max_id),
                                        result_type='recent',
                                        until=until_date,
                                        tweet_mode='extended')
            else:
                new_tweets = api.search(q=query,
                                        count=100,
                                        since_id=str(_since_id),
                                        result_type='recent',
                                        until=until_date,
                                        tweet_mode='extended')
            time.sleep(3)

            for t in new_tweets:
                print(t._json['created_at'], t._json['id'])

            print('Found {} tweets'.format(len(new_tweets)))
            if not new_tweets:
                raise ValueError('No new tweets found')

            new_tweets_json = [t._json for t in new_tweets]
            print('len(new_tweets_json) before ID filter',
                  len(new_tweets_json))

            # Save starting ID to define end point of next run
            if next_since_id == None:
                next_since_id = new_tweets_json[0]['id']

            # Update max ID to push back search threshold
            _max_id = int(new_tweets_json[-1]['id']) - 1
            print('max_id', _max_id)

            # Get IDs
            new_ids = set([t['id'] for t in new_tweets_json])

            # Filter out IDs already found
            new_tweets_json = [
                t for t in new_tweets_json if t['id'] not in found_ids
            ]
            print('len(new_tweets_json) after ID filter', len(new_tweets_json))

            # Update IDs
            found_ids = found_ids.union(new_ids)

            # Add metadata to tweets
            new_tweets_json = [{
                'term':
                term,
                'q':
                query,
                'get_date':
                datetime.datetime.utcnow().strftime(TWITTER_DATE_FORMAT),
                'tweet':
                t
            } for t in new_tweets_json]

            # Extend list to be saved
            searched_tweets.extend(new_tweets_json)
            if len(searched_tweets) > save_freq:
                dw.write(searched_tweets,
                         MONGO_DB_NAME,
                         MONGO_DB_COLLECTION,
                         filename=os.path.join(local_filestore,
                                               term_to_filepath(term),
                                               '{}.json'.format(DATE_STR)))
                searched_tweets = []
                if testing_mode:
                    print(
                        'TEST MODE - Tweets saved, returning from tweet_search function'
                    )
                    return api, searched_tweets, found_ids, next_since_id

        except tweepy.TweepError:
            print('Rate limit reached, waiting 15 minutes')
            print('(until: {})'.format(datetime.datetime.now() +
                                       datetime.timedelta(minutes=15)))
            # t0 = time.time()
            dw.write(searched_tweets,
                     MONGO_DB_NAME,
                     MONGO_DB_COLLECTION,
                     filename=os.path.join(local_filestore,
                                           term_to_filepath(term),
                                           '{}.json'.format(DATE_STR)))
            searched_tweets = []
            # time.sleep((15 * 60) - (time.time() - t0))
            time.sleep(15 * 60)
            continue

        except ValueError as e:
            if 'No new tweets found' in str(e):
                attempt += 1
                if attempt > max_attempts:
                    print('No tweets found, stopping search')
                    break
                else:
                    print('No tweets found, trying {} more time(s)'\
                            .format(max_attempts - attempt + 1))
                    continue
                print('Re-loading the twitter API')
                api = load_api()
                print('Waiting for a few seconds ...')
                time.sleep(3)
            else:
                action, errors = log_errors(e, errors, max_errors, api, term,
                                            query, until_date, since_id,
                                            save_freq, local_filestore,
                                            num_iterations, testing_mode)
                if action == 'continue':
                    continue
                elif action == 'break':
                    break

        except Exception as e:
            action, errors = log_errors(e, errors, max_errors, api, term,
                                        query, until_date, since_id, save_freq,
                                        local_filestore, num_iterations,
                                        testing_mode)
            if action == 'continue':
                continue
            elif action == 'break':
                break

    dw.write(searched_tweets,
             MONGO_DB_NAME,
             MONGO_DB_COLLECTION,
             filename=os.path.join(local_filestore, term_to_filepath(term),
                                   '{}.json'.format(DATE_STR)))

    return api, searched_tweets, found_ids, next_since_id

コード例 #7

0

ファイルを表示

    def __init__(self,
                 client_id,
                 server_host,
                 server_port,
                 mc_address,
                 mc_port,
                 send_interval=5,
                 chunk_size=10000000,
                 file_size=10000000 * 2,
                 run_time=30,
                 test_path='./',
                 dd_method=False):
        """
        :param client_id: string, unique id for the client
        :param server_host: string, ip address/hostname for sever
        :param server_host: string, port server will listen on
        :param mc_address: string, multicast group address to publish heartbeat
        :param mc_port: int, port for multicast group
        :param send_interval: int, heartbeat send interval
        :param chunk_size: int, in bytes, the size to data size to write to a
        file at a time
        :param file_size: int, maximum file size for the writer
        :param run_time: int, self explanatory, ya know
        :param test_path: string, path to write the data files
        :param dd_method: bool, use dd or not for the file writing
        """

        self.client_id = client_id
        self.server_host = server_host
        self.chunk_size = chunk_size
        self.file_size = file_size
        self.run_time = run_time
        self.mc_group = mc_address, mc_port
        self.send_interval = send_interval
        self.test_path = test_path
        # if dd_method is True, dd will be used
        # if dd_method is False, python file object will be used
        self.dd_method = dd_method
        logging.basicConfig(filename=client_id + '.log',
                            format='%(asctime)s %(levelname)s: %(message)s',
                            level=logging.INFO)

        # Unsure on requirements, going to assume worst case
        # Need to check to see if the chunk size and
        # run time will allow two instances of the time to be written
        try:
            # check to see if two chunks can be written to the file
            assert self.file_size / self.chunk_size >= 2

        except AssertionError:
            print "Client chunk size is too small for max file size." \
                  "Please reconfigure"
            exit(1)

        # Create the initial TCP connection
        self.tcp = TCPClient((server_host, server_port))
        self.hb1 = Heartbeat(self.mc_group[0], self.mc_group[1],
                             self.client_id, self.send_interval)
        self.kill_sig = Queue(1)
        self.hb_process = Process(target=self.hb1.run, args=(self.kill_sig, ))
        self.hb_process.daemon = True
        self.queue1 = Queue()
        dw1 = DataWriter(self.chunk_size, self.file_size, self.dd_method,
                         self.test_path)
        self.dw_process = Process(target=dw1.run,
                                  args=(self.queue1, self.kill_sig))
        self.dw_process.daemon = True
        self.dw_process_pid = None

コード例 #8

0

ファイルを表示

                                           dim=1).detach().cpu().numpy())
    ]
else:
    dev_preds = [
        id2label[x]
        for x in (torch.argmax(nnmodel(dev_data), dim=1).detach().numpy())
    ]

dataloader = Dataloader()

dev_spans = dataloader.read_spans(
    file_name="./datasets/dev-task-TC-template.out")

dev_spans["gold_label"] = dev_preds

datawriter = Datawriter()

datawriter.pred_writer(dev_spans, "./predictions/dev_preds.txt")

if gpu:
    test_preds = [
        id2label[x] for x in (torch.argmax(nnmodel(test_data.cuda()),
                                           dim=1).detach().cpu().numpy())
    ]
else:
    test_preds = [
        id2label[x]
        for x in (torch.argmax(nnmodel(test_data), dim=1).detach().numpy())
    ]

dataloader = Dataloader()

コード例 #9

0

ファイルを表示

    "device_id",  # the device we want to locate
    "system_id",  # TANGO has system id 7585, and Pozyx has 115200 (also used for socket port and baudrate)
    "anchor_id",  # anchor used to take the current measure
    "px",  # [px, py, pz] is the position of the device in world coordinate
    "py",
    "pz",
    "theta_x",  # [theta_x, theta_y, theta_z] corresponds to the orientation of the device (radian)
    "theta_y",
    "theta_z",
    "distance",  # distance between anchor and device
    "rssi",  # received signal strength
]

# Initialize the datawriter which will log the received measures in a csv file
datawriter = DataWriter(file_path,
                        header=data_fields,
                        verbose=True,
                        verbose_interval=1)

# ----------- initialize the threads -------------------
threads_list = []
if use_pozyx:
    threads_list.append(
        PozyxAcquisition(usb_port=usb_port, datawriter=datawriter))
if use_tango:
    threads_list.append(
        TangoAcquisition(local_ip=local_ip, datawriter=datawriter))

# start the threads
try:
    # Start threads
    for thread in threads_list:

コード例 #10

0

ファイルを表示

ファイル: online_sys_mon.py プロジェクト: chunmeng/emb_sys_mon

config = Config(os.getcwd() + '/config.json')

client = None
if config.console.type == 'serial':
    client = SerialConsole(
        config.console)  # The serial console to acquire data source stream
    client.login(user=config.console.login, password=config.console.password)
elif config.console.type == 'ssh':
    client = SshConsole(config.console)

ver = '0.0.0.0'
if client is None:
    data_reader = DataReaderStub()
else:
    ver = read_fw_version(client)
    data_reader = DataReader(
        client)  # The parsing and aggregation of data into Stats

data_writer = DataWriter(os.getcwd())  # Output Stats data to file
figure = plt.figure('Mem, CPU over time - ' + ver)

du = DataUpdater(data_reader, data_writer, figure)
# The interval timer is restarted after each call to __call__ returns
# So the total time for each iteration is interval + data processing time (sleeps)
anim = FuncAnimation(figure,
                     du,
                     init_func=du.init,
                     interval=config.interval * 1000,
                     blit=True)
plt.show()

コード例 #11

0

ファイルを表示

# Splitting into separate groups

# Starting from 1, 0 position is an empty row
most_played = groups[0].select("tr.app")[1:]
# Starting from 0 - the first row
trending = groups[1].select("tr.app")
# Starting from 0 - the first row
popular = groups[2].select("tr.app")
# Starting from 0 - the first row
hot_releases = groups[3].select("tr.app")

# Scraping -------------------------------------------------------------------------------------------------------------

data_scraper = DataScraper()
data_writer = DataWriter()

# Processing Most Played
most_played_data = []

for game_row in most_played:
    most_played_data.append(data_scraper.get_most_played_data(game_row))

data_writer.write_to_csv(most_played_data, "most_played")
# data_writer.write_to_json(most_played_data, "most_played")

# Processing Trending
trending_data = []

for game in trending:
    trending_data.append(data_scraper.get_trending_data(game))