Example #1
0
    def _recreate_user_timeline(self, consumer_id):
        """
        for when (server restarts, or a new user logs in)
        :return: True on success
        """

        content = (self._relations.select(
            self._dataset.item_id, self._dataset.timestamp).join(
                self._dataset,
                on=(self._relations.producer_id == self._dataset.producer_id
                    )).where(
                        self._relations.consumer_id == consumer_id).order_by(
                            self._dataset.timestamp.desc()).limit(
                                self._max_cache).namedtuples())

        consumer_feed = self.create_cache_name(consumer_id)
        for chunk in chunked(content, 400):
            redis.zadd(consumer_feed,
                       dict((c.item_id, c.timestamp) for c in chunk))

        if not self._include_actor:
            return True

        content = (self._dataset.select(
            self._dataset.item_id, self._dataset.timestamp).where(
                self._dataset.producer_id == consumer_id))

        pipe = redis.pipeline()
        for chunk in chunked(content, 400):
            pipe.zadd(consumer_id, dict(
                (c.item_id, c.timestamp) for c in chunk))

        pipe.execute()
        return True
Example #2
0
    def extract(self, gen: Generator):
        """
        Saves the content of the gen to DB. gen is a DomainIntel Object.
        :param gen: DomainIntel generator object
        """
        temp = []
        while True:
            try:
                item = next(gen)
            except StopIteration:
                break
            else:
                if MalwareDomains.select().where(MalwareDomains.domain == item.domain).exists():
                    continue

                if len(temp) == self.CHUNK_SIZE:
                    with db.atomic():
                        for batch in chunked(temp, 200):
                            temp = [{"domain": malware_domain.domain, "type": malware_domain.type,
                                        "reference": malware_domain.reference} for malware_domain in batch]
                            MalwareDomains.insert_many(temp).execute()
                    temp.clear()
                temp.append(item)

        if temp:
            with db.atomic():
                for batch in chunked(temp, 200):
                    temp = [{"domain": malware_domain.domain, "type": malware_domain.type,
                                "reference": malware_domain.reference} for malware_domain in batch]
                    MalwareDomains.insert_many(temp).execute()
def db_consume_data(files: List[str]):
    rows   = []
    tables = defaultdict(list)

    db_connect()
    db_drop_tables()
    db_create_tables()

    with profiler(f"Read files: {files}"):
        for f in files:
            def pool_init(pid):
                global PID; PID=pid
            # Ugly reinitialisation of the pool due to PID value propagation
            pool = multiprocessing.Pool(PROC_NR, pool_init, (len(f),))
            rows.extend(filter(None, fd_consume_data(f, pool)))
        for k,v in rows:
            tables[k].append(v)

    with tqdm(total=len(rows), desc="Insert records") as t:
        with profiler("Write to db"):
             for k in tables.keys():
                 with profiler(f"    {k}/{len(tables[k])}"):
                     with DB.atomic():
                         for batch in chunked(tables[k], DBBATCH):
                             globals()[k].insert_many(batch).execute()
                             t.update(len(batch))

    db_close()
Example #4
0
def etl_music_bands(db,
                    *,
                    chunk_size: int = 512,
                    limit: Optional[int] = None) -> int:
    """
    ETL music bands into our databases.

    :param db: The database in which to ETL the bands.
    :param chunk_size: The number of records in each insert batch.
    :param limit: The maximum number of records to fetch from
        wikidata.  This is included for debugging purpose.

    :return: The number of records that were ETLed.
    """

    query = """
    SELECT DISTINCT ?wikidata_id ?name
    WHERE
    {
      ?wikidata_id wdt:P31/wdt:P279* wd:Q2088357.
      ?wikidata_id rdfs:label ?name.
      FILTER (LANG(?name) = 'en')
    }
    """

    if limit:
        query = f"{query}\nLIMIT {limit}"

    bands = query_wikidata(query)
    with db.atomic():
        for batch in chunked(bands, _PEEWEE_CHUNK_SIZE):
            MusicBand.insert_many(batch).execute()

    return len(bands)
Example #5
0
def import_probes(product, probes):
    data = []
    for probe in probes:
        definition = probe.definition
        description = definition["description"]

        data.append({
            "product":
            product,
            "name":
            probe.name,
            "type":
            probe.type,
            "description":
            description,
            "definition":
            definition,
            "index":
            fn.to_tsvector("english", " ".join([probe.name, description])),
        })

    with db.atomic():
        for batch in chunked(data, 100):
            (Probes.insert_many(batch).on_conflict(
                conflict_target=[Probes.product, Probes.name],
                update={
                    Probes.description: EXCLUDED.description,
                    Probes.definition: EXCLUDED.definition,
                    Probes.index: EXCLUDED.index,
                },
            ).execute())
    log("Imported {n:,} probes for {product}".format(n=len(probes),
                                                     product=product))
def create_sentences(db, sentence_file=const.SOMETHING_LABELS_FILENAME):
    sentences_list = word_processing.read_csv_label_sentences(sentence_file)
    sentence_dicts = [{'sentence': sentence} for sentence in sentences_list]
    with db.atomic():
        for batch in chunked(sentence_dicts, 100):
            SentenceSQL.insert_many(batch).execute()
    return SentenceSQL.select()
Example #7
0
def parse_petition(petitions):
    for petition in petitions:
        # get_petition_status(petition)
        data = []
        petition_url = 'https://petition.president.gov.ua/petition/' + str(petition)
        pages = [petition_url + '/votes/' + str(i) for i in range(1, max_page(petition_url) + 1)]

        for page in pages:
            html = requests.get(page).text
            soup = BeautifulSoup(html, 'lxml')
            rows = soup.find_all('div', class_=re.compile(r'^table_row$'))

            for r in rows:
                position_number = r.find('div', class_='table_cell number').string.replace('.', '')
                username = r.find('div', class_='table_cell name').string
                day, month, year = r.find('div', class_='table_cell date').string.split(' ')
                new_month = m.parse(month)[0].inflect({'nomn'}).word.title()
                sign_date = datetime.strptime(' '.join([day, new_month, year]), '%d %B %Y')

                data.append((petition, position_number, username, sign_date))

        if Vote.select().where(Vote.petition == petition):
            print(f'petition {petition} was in db with',
                  Vote.delete().where(Vote.petition == petition).execute(), 'rows')
        with db.atomic():
            # by default SQLite limits the number of bound variables in a SQL query to 999
            for batch in chunked(data, 249):
                Vote.insert_many(batch, fields=['petition', 'position_number', 'username', 'sign_date']).execute()

        status = get_petition_status(petition)
        Petition.update(status=status).where(Petition.petition_id == petition).execute()
    if petitions:
        set_gender()
Example #8
0
        def save_all(objs: List["DbBarData"], progress_bar_dict=None):
            """
            save a list of objects, update if exists.
            """
            dicts = [i.to_dict() for i in objs]
            with db.atomic():
                if driver is Driver.POSTGRESQL:
                    for bar in dicts:
                        DbBarData.insert(bar).on_conflict(
                            update=bar,
                            conflict_target=(
                                DbBarData.symbol,
                                DbBarData.exchange,
                                DbBarData.interval,
                                DbBarData.datetime,
                            ),
                        ).execute()
                else:
                    total_sz = len(dicts)
                    loaded = 0
                    for c in chunked(dicts, 50):
                        DbBarData.insert_many(c).on_conflict_replace().execute()

                        if 'save_progress_bar' in progress_bar_dict:
                            loaded += 50
                            percent_saved = min(round(100 * loaded / total_sz, 2), 100)
                            QApplication.processEvents()
                            progress_bar_dict['save_progress_bar'].setValue(percent_saved)

                        elif 'web_progress' in progress_bar_dict:
                            loaded += 50
                            percent_saved = min(round(100 * loaded / total_sz, 2), 100)
                            progress_bar_dict['web_progress'].write_message({'progress': percent_saved})
                            print(f"web progress: {percent_saved}")
Example #9
0
def bootstrap_tasks(config):
    for (name, tasks) in config["tasks"].items():
        TaskGroup.create(name=name)
        with db.atomic():
            for batch in chunked(tasks, 100):
                Task.insert_many(
                    [{"path": task, "group": name} for task in batch]
                ).execute()
Example #10
0
 def insert_many_safe(cls, models, batch_size=100):
     with cls._meta.database.atomic() as transaction:
         try:
             for batch in chunked(models, 100):
                 cls.insert_many(models)
         except PeeweeException as pwex:
             transaction.rollback()
             print(pwex)
Example #11
0
 def batch_insert(cls, dict_list, nums=1000):
     """批量创建"""
     if not dict_list:
         return
     for batch in chunked(dict_list, nums):
         cls.insert_many(batch).execute()
     print 'insert {} rows:{}'.format(cls.__name__.lower(), len(dict_list))
     return
Example #12
0
 def save_all(objs: List["DbBarData"]):
     """
     save a list of objects, update if exists.
     """
     dicts = [i.to_dict() for i in objs]
     with db.atomic():
         for c in chunked(dicts, 60):
             DbBarData.insert_many(c).on_conflict_replace().execute()
Example #13
0
def bootstrap_tasks(config):
    for (name, tasks) in config["tasks"].items():
        TaskGroup.insert(name=name).on_conflict("ignore").execute()
        with db.atomic():
            for batch in chunked(tasks, 100):
                query = Task.insert_many(
                    [{"path": task, "group": name} for task in batch]
                ).on_conflict("ignore")
                query.execute()
Example #14
0
 def insert_data(self, data, table_class):
     """
     Insert data into database in chunks using peewee functions
     :param table_class: Class which should store the given date in database
     :param data: List of persons to be stored in database
     """
     with self.db.atomic():
         for batch in chunked(data, 5):
             table_class.insert_many(batch).execute()
Example #15
0
def sqlite_format(items, filepath):
    db = SqliteDatabase(filepath.as_posix())
    db_proxy.initialize(db)
    db.create_tables((Hotel, ))

    with db.atomic():
        Hotel.delete().execute()
        for batch in chunked(items, 900):
            Hotel.insert_many(batch).execute()
Example #16
0
def _generate_work(result):
    """
    Fetch work in chunks, loop
    and yield one at a time for
    display.
    """
    for work_set in chunked(result, 100):
        for work in work_set:
            yield work
Example #17
0
def _save_selenium_redis(entries: typing.Union[Link, typing.List[Link]], single: bool = False,
                         score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None:
    """Save link to the :mod:`selenium` database.

    The function updates the ``queue_selenium`` database.

    Args:
        entries: Links to be added to the :mod:`selenium` database.
            It can be either an *iterable* of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is an *iterable* of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Forces ``ZADD`` to only create new elements and not to
            update scores for elements that already exist.
        xx: Forces ``ZADD`` to only update scores of elements that
            already exist. New elements will not be added.

    When ``entries`` is a list of :class:`~darc.link.Link` instances,
    we tries to perform *bulk* update to easy the memory consumption.
    The *bulk* size is defined by :data:`~darc.db.BULK_SIZE`.

    Notes:
        The ``entries`` will be dumped through :mod:`pickle` so that
        :mod:`darc` do not need to parse them again.

    """
    if not entries:
        return
    if score is None:
        score = time.time()

    if not single:
        if typing.TYPE_CHECKING:
            entries = typing.cast(typing.List[Link], entries)

        for chunk in peewee.chunked(entries, BULK_SIZE):
            pool = list(filter(lambda link: isinstance(link, Link), chunk))
            for link in pool:
                _redis_command('set', link.name, pickle.dumps(link), nx=True)
            mapping = {
                link.name: score for link in pool
            }
            with _redis_get_lock('lock_queue_selenium'):  # type: ignore
                _redis_command('zadd', 'queue_selenium', mapping, nx=nx, xx=xx)
        return

    if typing.TYPE_CHECKING:
        entries = typing.cast(Link, entries)

    _redis_command('set', entries.name, pickle.dumps(entries), nx=True)
    mapping = {
        entries.name: score,
    }
    with _redis_get_lock('lock_queue_selenium'):  # type: ignore
        _redis_command('zadd', 'queue_selenium', mapping, nx=nx, xx=xx)
Example #18
0
def dodaj_dane(dane):
    for model, plik in dane.items():
        pola = [pole for pole in model._meta.fields]
        pola.pop(0)
        wpisy = dane_z_pliku(plik + '.txt', ';')
        print(wpisy)
        with baza.atomic():
            for batch in chunked(wpisy, 100):
                model.insert_many(batch, fields=pola).execute()
                baza.commit()
Example #19
0
def _save_requests_redis(entries: 'Union[Link, List[Link]]',
                         single: bool = False,
                         score: 'Optional[float]' = None,
                         nx: bool = False,
                         xx: bool = False) -> None:
    """Save link to the :mod:`requests` database.

    The function updates the ``queue_requests`` database.

    Args:
        entries: Links to be added to the :mod:`requests` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Forces ``ZADD`` to only create new elements and not to
            update scores for elements that already exist.
        xx: Forces ``ZADD`` to only update scores of elements that
            already exist. New elements will not be added.

    """
    if score is None:
        score = time.time()

    if not single:
        if TYPE_CHECKING:
            entries = cast('List[Link]', entries)

        for chunk in peewee.chunked(entries, BULK_SIZE):
            pool = list(filter(lambda link: isinstance(link, Link),
                               chunk))  # type: List[Link]
            for link in pool:
                _redis_command('set', link.name, pickle.dumps(link), nx=True)
            with _redis_get_lock('queue_requests'):
                _redis_command('zadd',
                               'queue_requests',
                               {link.name: score
                                for link in pool},
                               nx=nx,
                               xx=xx)
        return None

    if TYPE_CHECKING:
        entries = cast('Link', entries)

    _redis_command('set', entries.name, pickle.dumps(entries), nx=True)
    with _redis_get_lock('queue_requests'):
        _redis_command('zadd',
                       'queue_requests', {
                           entries.name: score,
                       },
                       nx=nx,
                       xx=xx)
    return None
Example #20
0
 def reload_default(cls):
     if not KyogreDB._db:
         return
     try:
         cls.delete().execute()
     except:
         pass
     with open('data/pkmn_data.json', 'r') as f:
         pkmn_data = json.load(f)
     with KyogreDB._db.atomic():
         for chunk in chunked(pkmn_data, 50):
             cls.insert_many(chunk).execute()
Example #21
0
    def save_bar_data(self, bars: List[BarData]) -> bool:
        """"""
        # Store key parameters
        bar = bars[0]
        symbol = bar.symbol
        exchange = bar.exchange
        interval = bar.interval

        # Convert bar object to dict and adjust timezone
        data = []

        for bar in bars:
            bar.datetime = convert_tz(bar.datetime)

            d = bar.__dict__
            d["exchange"] = d["exchange"].value
            d["interval"] = d["interval"].value
            d.pop("gateway_name")
            d.pop("vt_symbol")
            data.append(d)

        # Upsert data into database
        with self.db.atomic():
            for c in chunked(data, 50):
                DbBarData.insert_many(c).on_conflict_replace().execute()

        # Update bar overview
        overview: DbBarOverview = DbBarOverview.get_or_none(
            DbBarOverview.symbol == symbol,
            DbBarOverview.exchange == exchange.value,
            DbBarOverview.interval == interval.value,
        )

        if not overview:
            overview = DbBarOverview()
            overview.symbol = symbol
            overview.exchange = exchange.value
            overview.interval = interval.value
            overview.start = bars[0].datetime
            overview.end = bars[-1].datetime
            overview.count = len(bars)
        else:
            overview.start = min(bars[0].datetime, overview.start)
            overview.end = max(bars[-1].datetime, overview.end)

            s: ModelSelect = DbBarData.select().where(
                (DbBarData.symbol == symbol)
                & (DbBarData.exchange == exchange.value)
                & (DbBarData.interval == interval.value)
            )
            overview.count = s.count()

        overview.save()
Example #22
0
def viewscsvtosql(path, datatype=1):
     # 从csv文件读入数据
    df = pd.read_csv(path)
    df['time'] = pd.to_datetime(df['time'])
    df = df.fillna('')  # 填充NA数据

    # 遍历读取处理
    news_data = []
    for index, row in df.iterrows():
        tmp = {}
        tmp['viewid'] = row['id']
        tmp['personname'] = row['person_name']
        if row['country'] == 'N': 
            tmp['country'] = ""
        else: 
            tmp['country'] = row['country']
        tmp['orgname'] = row['org_name']
        tmp['pos'] = row['pos']
        tmp['verb'] = row['verb']
        tmp['viewpoint'] = row['viewpoint']
        tmp['newsid'] = row['news_id']
        tmp['sentiment'] = row['sentiment']
        tmp['original_text'] = row['original_text']
        tmp['time'] = datetime.strftime(row['time'],'%Y-%m-%d %H:%M:%S')    # 格式化时间字符串

        news_data.append(tmp)

    # write data to mysql
    mysql_db.connect()
    
    # 插入新闻数据
    if not ViewsInfo.table_exists(): # 如果表不存在则创建
        ViewsInfo.create_table()
    # else: # bug调好后注释掉, 改为增量
    #     ViewsInfo.delete().execute() # 每次重新更新之前清空数据表
    '''
    # 根据切片分批次插入
    slice_size = 300    # 切片大小
    nslices = math.floor(len(news_data) / slice_size)
    for i in range(0, nslices):
        with mysql_db.atomic():
            ViewsInfo.insert_many(news_data[i * slice_size: (i + 1) * slice_size]).execute() # 批量插入

    # 插入最后一个切片的数据
    with mysql_db.atomic():
        ViewsInfo.insert_many(news_data[nslices*slice_size:]).execute() # 批量插入
    '''

    with mysql_db.atomic():
        for batch in chunked(news_data, 300): # 一次300条
            ViewsInfo.insert_many(batch).on_conflict_ignore().execute() # 批量插入, 主键重复则忽略该条
    mysql_db.close()
    return 
Example #23
0
def resolve_vods_by_canon_ids(ids):
    vods = []
    for batch in peewee.chunked(ids, 100):
        results = list(
            PerfVODModel.select().where(PerfVODModel.canon_id << batch))
        if results:
            vmap = {v.canon_id: v for v in results}
        else:
            vmap = {}
        for canon_id in batch:
            vods.append(vmap.get(canon_id))
    return [PerfVOD.from_database_model(v) if v else None for v in vods]
Example #24
0
def load_status_updates(filename, status_collection):
    '''
    Opens a CSV file with status data and
    adds it to an existing instance of
    UserStatusCollection

    Requirements:
    - If a status_id already exists, it
    will ignore it and continue to the
    next.
    - Returns False if there are any errors
    (such as empty fields in the source CSV file)
    - Otherwise, it returns True.
    '''
    logger.debug('Entering load_status_updates(%s, %s)', filename,
                 status_collection)
    db_fields = ('status_id', 'user_id', 'status_text')
    status_data = []
    status_ids = set()
    if not filename.endswith('.csv'):
        logger.error('UserStatus files must be in csv format')
        return False
    with open(filename, newline='') as statusfile:
        logger.info('Saving statuses to %s', filename)
        data = csv.reader(statusfile)
        try:
            next(data)  # Eat header data
        except StopIteration:
            logger.error('%s does not contain data', filename)
            return False
        for row in data:
            # There should be three parameters
            if len(row) != 3:
                logger.error('UserStatus needs three parameters')
                return False
            # Empty strings, 0, and None are all considered empty fields
            for item in row:
                if not item:
                    logger.error('UserStatus can not have empty fields')
                    return False
            # Filter for duplicate status_ids
            if row[0] not in status_ids:
                # Should only be called with three non-empty parameters
                logger.debug('Adding %s to collection', row)
                status_data.append(row)
                status_ids.add(row[0])
    logger.info('Saving collection in database')
    Status = status_collection.table  # pylint: disable=C0103
    with status_collection.database.transaction():
        for batch in chunked(status_data, 100):
            Status.insert_many(batch, fields=db_fields).execute()
    return True
Example #25
0
 def update_category_videos(self, cid):
     _channels_by_cat_id = self.api_request(self.base_url,
                                            {"get_videos_by_cat_id": cid})
     videos = []
     streams = []
     for c in _channels_by_cat_id:
         videos.append({
             "_id":
             c.get("id"),
             "cid":
             c.get("cid"),
             "title":
             c.get("video_title"),
             "thumbnail":
             self.image_url(c.get("video_thumbnail_b")),
         })
         for s in c.get("stream_list"):
             streams.append({
                 "_id": s.get("vod_stream_id"),
                 "channel_id": c.get("id"),
                 "name": s.get("name"),
                 "stream_url": s.get("stream_url"),
                 "token": s.get("token"),
                 "agent": s.get("agent"),
                 "referer": s.get("referer_vod"),
             })
     with db.atomic():
         if len(streams) > 2:
             """ Data fetch successful delete old data """
             cat_channels = [
                 v._id for v in Video.select().where(Video.cid == cid)
             ]
             VodStream.delete().where(
                 VodStream.channel_id.in_(cat_channels)).execute()
             Video.delete().where(Video.cid == cid).execute()
         for batch in pw.chunked(videos, 100):
             Video.replace_many(batch).execute()
         for batch in pw.chunked(streams, 100):
             VodStream.replace_many(batch).execute()
Example #26
0
 def update_category_channels(self, cid):
     _channels_by_cat_id = self.api_request(self.base_url,
                                            {"get_channels_by_cat_id": cid})
     channels = []
     streams = []
     for c in _channels_by_cat_id:
         channels.append({
             "_id":
             c.get("id"),
             "cid":
             c.get("cid"),
             "title":
             c.get("channel_title"),
             "thumbnail":
             self.image_url(c.get("channel_thumbnail")),
         })
         for s in c.get("stream_list"):
             streams.append({
                 "_id": s.get("stream_id"),
                 "channel_id": c.get("id"),
                 "name": s.get("name"),
                 "stream_url": s.get("stream_url"),
                 "token": s.get("token"),
                 "agent": s.get("agent"),
                 "referer": s.get("referer"),
             })
     with db.atomic():
         if len(streams) > 2:
             """ Data fetch successful delete old data """
             cat_channels = [
                 c._id for c in Channel.select().where(Channel.cid == cid)
             ]
             Stream.delete().where(
                 Stream.channel_id.in_(cat_channels)).execute()
             Channel.delete().where(Channel.cid == cid).execute()
         for batch in pw.chunked(channels, 100):
             Channel.replace_many(batch).execute()
         for batch in pw.chunked(streams, 100):
             Stream.replace_many(batch).execute()
Example #27
0
def move_df_to_db(imported_data:pd.DataFrame,future_download:bool):
    print("move_df_to_db 函数")

    bars = []
    count = 0
    time_consuming_start = time()
    tmpsymbol = None
    start_time = None

    for row in imported_data.itertuples():
        bar = BarData(

            datetime=datetime.fromtimestamp(row.datetime),   #为标准datetime格式,非datetime64[ns],timeStamp
            symbol=row.symbol,
            exchange=row.exchange,
            interval=row.interval,

            open_price=row.open_price,
            high_price=row.high_price,
            low_price=row.low_price,
            close_price=row.close_price,

            # open_interest=row.open_interest,
            volume=row.volume,
            gateway_name="DB",

        )
        if not tmpsymbol :
            tmpsymbol = bar.symbol

        if future_download:
            # 夜盘时间21:00 - 2:30 日期减1天
            if bar.datetime.time() >= dtime(21,0) or bar.datetime.time() <= dtime(2,30):
                bar.datetime -= timedelta(days=1)
            # 其他时间分钟减1 ???
            bar.datetime-= timedelta(minutes=1)

        if not start_time:
            start_time = bar.datetime

        bars.append(bar)
        # do some statistics
        count += 1
    end_time = bar.datetime

    # insert into database
    for bar_data in chunked(bars, 10000):  # 分批保存数据
        database_manager.save_bar_data(bar_data)

    time_consuming_end =time()
    print(f'载入通达信标的:{tmpsymbol} 分钟数据,开始时间:{start_time},结束时间:{end_time},数据量:{count},耗时:{round(time_consuming_end-time_consuming_start,3)}秒')
def populate(experiments, experiment_teams):
    """
    Populates the database with experiment data.

    Args:
        experiments (list[dict]): A list of dictionaries containing information on experiments.
        experiment_teams (list[exp_db_populator.data_types.ExperimentTeamData]): A list containing the users for all new experiments.
    """
    if not experiments or not experiment_teams:
        raise KeyError("Experiment without team or vice versa")

    for batch in chunked(experiments, 100):
        Experiment.insert_many(batch).on_conflict_replace().execute()

    teams_update = [{
        Experimentteams.experimentid: exp_team.rb_number,
        Experimentteams.roleid: exp_team.role_id,
        Experimentteams.startdate: exp_team.start_date,
        Experimentteams.userid: exp_team.user.user_id
    } for exp_team in experiment_teams]

    for batch in chunked(teams_update, 100):
        Experimentteams.insert_many(batch).on_conflict_ignore().execute()
Example #29
0
    def bulk_insert(cls, data: Iterable[dict]) -> int:
        """
        BaseModel.bulk_insert(row_dict: dict) -> int
        Performs an atomic bulk insertion of the data provided in row_dicts and returns the number of rows inserted
        :param data: sequence of dictionary objects mapping field names to values to insert
        :return int: The number of rows inserted
        :raise peewee.PeeweeException: if bulk insertion fails
        """
        rows_inserted = 0
        with cls._meta.database.atomic() as transaction:
            for batch in chunked(data, 100):
                rows_inserted += cls.insert_many(batch).execute()

        return rows_inserted
Example #30
0
def copy_db():
    # Name_2, Petition_2, User
    # db.drop_tables([Peticia], safe=True)
    # db.create_tables([Peticia], safe=True)

    # db.create_tables([Name], safe=True)
    # query = Name_2.select()
    # data = []
    # for i in query:
    #     data.append((i.username, i.gender))
    #
    # with db.atomic():
    #     # by default SQLite limits the number of bound variables in a SQL query to 999
    #     for batch in chunked(data, 450):
    #         Name.insert_many(batch, fields=[Name.username, Name.gender]).execute()
    #
    #
    #
    # db.create_tables([Petition], safe=True)
    # query = Petition_2.select()
    # for i in query:
    #     Petition.create(
    #         petition_id=i.petition_id,
    #         status=i.status,
    #         title=i.title,
    #         article=i.article,
    #         answer=i.answer
    #     )
    #
    #
    #
    db.create_tables([Vote], safe=True)
    for i in User.select(User.petition_id).distinct():
        data = []
        # p = Petition.get(petition_id=i.petition_id).petition_id
        p = i.petition_id
        query = User.select().where(User.petition == i.petition_id)

        for user in query:
            data.append((p, user.position_number, user.username,
                         user.sign_date, user.gender))

        with db.atomic():
            # by default SQLite limits the number of bound variables in a SQL query to 999
            for batch in chunked(data, 198):
                # User.insert_many(batch, fields=[User.petition, User.position_number,
                #                                 User.username, User.sign_date, User.gender]).execute()
                Vote.insert_many(batch).execute()