def test_error1(self, conn): try: r.error('msg').run(conn) except RqlRuntimeError as err: rql_err = err assertEqual('msg', err.message) assert (isinstance(rql_err, RqlRuntimeError))
def test_error1(self, conn): try: r.error('msg').run(conn) except RqlRuntimeError as err: rql_err = err assertEqual('msg', err.message) assert(isinstance(rql_err, RqlRuntimeError))
def to_branch(self, test, error_msg): '''Turns a normal test into an ugly but helpful branch/error test''' return lambda v: r.branch( test(v), True, r.error(self.path + ' ' + error_msg), )
def mount(self): if self.mounted: return query = self._ensure_table_query(self.db_query, self.files_table_name) file_index_func = lambda row: rethinkdb.args([ row[STATUS_JSON_NAME], row[FILE_NAME_JSON_NAME], row[ FINISHED_DATE_JSON_NAME] ]) file_prefix_index_func = lambda row: rethinkdb.expr(row[ STATUS_JSON_NAME] == "completed").branch( rethinkdb.args([ row[FILE_NAME_JSON_NAME].split("/").slice(1, -1), row[ FINISHED_DATE_JSON_NAME] ]), rethinkdb.error("File is still uploading.")) query = query.do(lambda result: rethinkdb.expr(result[ "tables_created"] == 1).branch( self._create_index( self.db_query, self.files_table_name, self.file_index, file_index_func).do(lambda _: self._create_index( self.db_query, self.files_table_name, self. file_prefix_index, file_prefix_index_func)), None)) query = query.do(lambda _: self._ensure_table_query( self.db_query, self.chunks_table_name)) chunk_index_func = lambda row: rethinkdb.args( [row[FILE_ID_JSON_NAME], row[NUM_JSON_NAME]]) query = query.do(lambda result: rethinkdb.expr(result[ "tables_created"] == 1).branch( self._create_index(self.db_query, self.chunks_table_name, self. chunk_index, chunk_index_func), None)) query = query.do(lambda _: self._confirm_mount()) return query
def commit(conn, xid, changes): result = run_query( TX_TBL.get(xid).update( rethinkdb.branch(STATUS_ROW.eq('pending'), { 'status': 'committed', 'changes': changes }, rethinkdb.error('precondition failed'))), conn) return result['errors'] == 0
def write(conn, xid, table, key, old_xid, document): if old_xid is not None: result = run_query( table.get(key).update( rethinkdb.branch(XID_ROW.eq(old_xid), { 'xid': xid, 'intent': document }, rethinkdb.error('write conflict'))), conn) else: result = run_query( table.insert({ 'id': key, 'xid': xid, 'intent': document }, conflict='error'), conn) if result['errors'] != 0: raise exceptions.OptimisticLockFailure(xid)
def go(): with except_printer(): r.expr({'err': r.error('bob')}).run(c) with except_printer(): r.expr([1, 2, 3, r.error('bob')]).run(c) with except_printer(): (((r.expr(1) + 1) - 8) * r.error('bob')).run(c) with except_printer(): r.expr([1, 2, 3]).append(r.error('bob')).run(c) with except_printer(): r.expr([1, 2, 3, r.error('bob')])[1:].run(c) with except_printer(): r.expr({'a': r.error('bob')})['a'].run(c) with except_printer(): r.db('test').table('test').filter( lambda a: a.contains(r.error('bob'))).run(c) with except_printer(): r.expr(1).do(lambda x: r.error('bob')).run(c) with except_printer(): r.expr(1).do(lambda x: x + r.error('bob')).run(c) with except_printer(): r.branch( r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c) with except_printer(): r.expr([1, 2]).reduce(lambda a, b: a + r.error("bob")).run(c)
def go(): with except_printer(): r.connect(host="localhost", port="123abc") with except_printer(): r.expr({'err': r.error('bob')}).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')]).run(c) with except_printer(): (((r.expr(1) + 1) - 8) * r.error('bob')).run(c) with except_printer(): r.expr([1,2,3]).append(r.error('bob')).run(c) with except_printer(): r.expr([1,2,3, r.error('bob')])[1:].run(c) with except_printer(): r.expr({'a':r.error('bob')})['a'].run(c) with except_printer(): r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c) with except_printer(): r.expr(1).do(lambda x: r.error('bob')).run(c) with except_printer(): r.expr(1).do(lambda x: x + r.error('bob')).run(c) with except_printer(): r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c) with except_printer(): r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
def append_load(db, collection, vcf_filenames, hide_loading=False, chunk_size=20, hard_durability=False, ignore_bad_info=False): """Performs the loading operations for a collection that already contains samples.""" # Check parameters: check_parameters(collection, vcf_filenames, chunk_size) # Prepare the parameter for db queries: durability = 'hard' if hard_durability else 'soft' ### CONSISTENCY CHECKS ### metadata = r.table('__METADATA__').get(collection).run(db) table_list = r.table_list().run(db) assert (collection in table_list) == (metadata is not None), \ "This collection is in a spurious state. Use vcf_admin.py to perform sanity checks." if metadata is None: print('This is a new collection, switching to direct loading method.') return quick_load(db, collection, vcf_filenames, hide_loading=hide_loading, chunk_size=chunk_size, hard_durability=hard_durability, ignore_bad_info=ignore_bad_info) else: # must check if the collection has finished its pending operations assert not metadata.get('doing_init') and not metadata.get('appending_filenames'), \ "This collection either has still to complete another import operation or has been left in an inconsistent state, aborting. Use vcf_admin to perform consistency checks." ######################### # Load parsers: headers, samples, parsers, filestreams = init_parsers(vcf_filenames, ignore_bad_info=ignore_bad_info) # I want the original filestreams, not the 'fake' ones offered by gzip filestreams = [f.fileobj if f.name.endswith('.gz') else f for f in filestreams] # check if there are collisions between new samples and the samples already loaded new_samples = set([sample for sublist in samples for sample in sublist]) old_samples = set(metadata['samples'].keys()) inter_samples = new_samples & old_samples if inter_samples: print('Some sample names are colliding, aborting.') print('Offending names:', ', '.join(inter_samples)) raise ValueError # check if there are collisions between VCF filenames old_vcf_filenames = set(metadata['vcfs'].keys()) inter_vcf_filenames = set(vcf_filenames) & old_vcf_filenames if inter_vcf_filenames: print('Some VCF filenames are colliding, aborting.') print('Offending names:', ', '.join(inter_vcf_filenames)) print('Tip: you might consider using a more complete pathname to differentiate between files with the same name, eg:') print('$ ./vcf_import.py mycollection mytastycows/samples.vcf mytastiercows/samples.vcf') raise ValueError # Get filesize for every stream, used to print completion percentage and speed. total_filesize = float(sum([os.path.getsize(vcf) for vcf in vcf_filenames])) total_filesize_as_percentage = total_filesize/100 ## UPDATE METADATA ## collection_info = { 'vcfs': {vcf_filenames[i] : headers[i]._asdict() for i in range(len(headers))}, 'samples': {sample: vcf_filenames[i] for i in range(len(headers)) for sample in samples[i]}, 'appending_filenames': vcf_filenames } r.table('__METADATA__').get(collection).update(r.row.merge(collection_info)).run(db) ## UPDATE ROWS ## # Timers for completion percentage: last_iter = start_time = time.time() ## UPDATE ROWS ## for multirecord in parsers: merged_record = merge_records(multirecord, vcf_filenames, samples) result = r.table(collection).get(merged_record['id']).replace( r.branch(r.row == None, merged_record, # new record r.branch(r.row['REF'] == merged_record['REF'], r.row.merge(merged_record), r.error())), durability='soft').run(db) if result['errors']: print("\nFound mismatched REF for CHROM: {} POS: {} when confronting with data already in the database, aborting. All samples in the same collection must share the same reference genome.".format(merged_record['CHROM'], merged_record['POS'])) raise ValueError if not hide_loading: pos = sum([f.tell() for f in filestreams]) print('\rLoading: {0:.2f}%'.format(pos/total_filesize_as_percentage), end=' ') now = time.time() print('@ {} records/second'.format(int(1/(now-last_iter))), end=' ') print('- ETA: {}'.format(datetime.timedelta(seconds=int((now - start_time) * (total_filesize - pos) / pos))), end=' ') sys.stdout.flush() last_iter = now print('\nCompleted loading, waiting for all inserts to be flushed to disk.') # flag insert job as complete once data is written to disk r.table(collection).sync().run(db) print('OK, updating metadata.') r.table('__METADATA__').get(collection).replace(lambda x: x.without('appending_filenames')).run(db)