Exemple #1
0
 def test_error1(self, conn):
     try:
         r.error('msg').run(conn)
     except RqlRuntimeError as err:
         rql_err = err
         assertEqual('msg', err.message)
     assert (isinstance(rql_err, RqlRuntimeError))
Exemple #2
0
 def test_error1(self, conn):
     try:
         r.error('msg').run(conn)
     except RqlRuntimeError as err:
         rql_err = err
         assertEqual('msg', err.message)
     assert(isinstance(rql_err, RqlRuntimeError))
Exemple #3
0
 def to_branch(self, test, error_msg):
     '''Turns a normal test into an ugly but helpful branch/error test'''
     return lambda v: r.branch(
         test(v),
         True,
         r.error(self.path + ' ' + error_msg),
     )
Exemple #4
0
    def mount(self):
        if self.mounted:
            return

        query = self._ensure_table_query(self.db_query, self.files_table_name)
        file_index_func = lambda row: rethinkdb.args([
            row[STATUS_JSON_NAME], row[FILE_NAME_JSON_NAME], row[
                FINISHED_DATE_JSON_NAME]
        ])
        file_prefix_index_func = lambda row: rethinkdb.expr(row[
            STATUS_JSON_NAME] == "completed").branch(
                rethinkdb.args([
                    row[FILE_NAME_JSON_NAME].split("/").slice(1, -1), row[
                        FINISHED_DATE_JSON_NAME]
                ]), rethinkdb.error("File is still uploading."))
        query = query.do(lambda result: rethinkdb.expr(result[
            "tables_created"] == 1).branch(
                self._create_index(
                    self.db_query, self.files_table_name, self.file_index,
                    file_index_func).do(lambda _: self._create_index(
                        self.db_query, self.files_table_name, self.
                        file_prefix_index, file_prefix_index_func)), None))

        query = query.do(lambda _: self._ensure_table_query(
            self.db_query, self.chunks_table_name))
        chunk_index_func = lambda row: rethinkdb.args(
            [row[FILE_ID_JSON_NAME], row[NUM_JSON_NAME]])
        query = query.do(lambda result: rethinkdb.expr(result[
            "tables_created"] == 1).branch(
                self._create_index(self.db_query, self.chunks_table_name, self.
                                   chunk_index, chunk_index_func), None))
        query = query.do(lambda _: self._confirm_mount())
        return query
Exemple #5
0
def commit(conn, xid, changes):
    result = run_query(
        TX_TBL.get(xid).update(
            rethinkdb.branch(STATUS_ROW.eq('pending'), {
                'status': 'committed',
                'changes': changes
            }, rethinkdb.error('precondition failed'))), conn)
    return result['errors'] == 0
Exemple #6
0
def write(conn, xid, table, key, old_xid, document):
    if old_xid is not None:
        result = run_query(
            table.get(key).update(
                rethinkdb.branch(XID_ROW.eq(old_xid), {
                    'xid': xid,
                    'intent': document
                }, rethinkdb.error('write conflict'))), conn)
    else:
        result = run_query(
            table.insert({
                'id': key,
                'xid': xid,
                'intent': document
            },
                         conflict='error'), conn)
    if result['errors'] != 0:
        raise exceptions.OptimisticLockFailure(xid)
Exemple #7
0
def go():
    with except_printer():
        r.expr({'err': r.error('bob')}).run(c)
    with except_printer():
        r.expr([1, 2, 3, r.error('bob')]).run(c)
    with except_printer():
        (((r.expr(1) + 1) - 8) * r.error('bob')).run(c)
    with except_printer():
        r.expr([1, 2, 3]).append(r.error('bob')).run(c)
    with except_printer():
        r.expr([1, 2, 3, r.error('bob')])[1:].run(c)
    with except_printer():
        r.expr({'a': r.error('bob')})['a'].run(c)
    with except_printer():
        r.db('test').table('test').filter(
            lambda a: a.contains(r.error('bob'))).run(c)
    with except_printer():
        r.expr(1).do(lambda x: r.error('bob')).run(c)
    with except_printer():
        r.expr(1).do(lambda x: x + r.error('bob')).run(c)
    with except_printer():
        r.branch(
            r.db('test').table('test').get(0)['a'].contains(r.error('bob')),
            r.expr(1), r.expr(2)).run(c)
    with except_printer():
        r.expr([1, 2]).reduce(lambda a, b: a + r.error("bob")).run(c)
Exemple #8
0
def go():
    with except_printer():
        r.connect(host="localhost", port="123abc")
    with except_printer():
        r.expr({'err': r.error('bob')}).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')]).run(c)
    with except_printer():
        (((r.expr(1) + 1) - 8) * r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3]).append(r.error('bob')).run(c)
    with except_printer():
        r.expr([1,2,3, r.error('bob')])[1:].run(c)
    with except_printer():
        r.expr({'a':r.error('bob')})['a'].run(c)
    with except_printer():
        r.db('test').table('test').filter(lambda a: a.contains(r.error('bob'))).run(c)
    with except_printer():
        r.expr(1).do(lambda x: r.error('bob')).run(c)
    with except_printer():
        r.expr(1).do(lambda x: x + r.error('bob')).run(c)
    with except_printer():
        r.branch(r.db('test').table('test').get(0)['a'].contains(r.error('bob')), r.expr(1), r.expr(2)).run(c)
    with except_printer():
        r.expr([1,2]).reduce(lambda a,b: a + r.error("bob")).run(c)
def append_load(db, collection, vcf_filenames, hide_loading=False, chunk_size=20, hard_durability=False, ignore_bad_info=False):
	"""Performs the loading operations for a collection that already contains samples."""
	
	# Check parameters:
	check_parameters(collection, vcf_filenames, chunk_size)

	# Prepare the parameter for db queries:
	durability = 'hard' if hard_durability else 'soft'

	### CONSISTENCY CHECKS ###
	metadata = r.table('__METADATA__').get(collection).run(db)
	table_list = r.table_list().run(db)

	assert (collection in table_list) == (metadata is not None), \
		"This collection is in a spurious state. Use vcf_admin.py to perform sanity checks."

	if metadata is None:
		print('This is a new collection, switching to direct loading method.')
		return quick_load(db, collection, vcf_filenames, hide_loading=hide_loading, chunk_size=chunk_size, hard_durability=hard_durability, ignore_bad_info=ignore_bad_info)
	else:
		# must check if the collection has finished its pending operations
		assert not metadata.get('doing_init') and not metadata.get('appending_filenames'), \
			"This collection either has still to complete another import operation or has been left in an inconsistent state, aborting. Use vcf_admin to perform consistency checks."
	#########################

	# Load parsers:
	headers, samples, parsers, filestreams = init_parsers(vcf_filenames, ignore_bad_info=ignore_bad_info)
	# I want the original filestreams, not the 'fake' ones offered by gzip
	filestreams = [f.fileobj if f.name.endswith('.gz') else f for f in filestreams]


	# check if there are collisions between new samples and the samples already loaded
	new_samples = set([sample for sublist in samples for sample in sublist])
	old_samples = set(metadata['samples'].keys())
	inter_samples = new_samples & old_samples
	if inter_samples:
		print('Some sample names are colliding, aborting.')
		print('Offending names:', ', '.join(inter_samples))
		raise ValueError

	# check if there are collisions between VCF filenames
	old_vcf_filenames = set(metadata['vcfs'].keys())
	inter_vcf_filenames = set(vcf_filenames) & old_vcf_filenames
	if inter_vcf_filenames:
		print('Some VCF filenames are colliding, aborting.')
		print('Offending names:', ', '.join(inter_vcf_filenames))
		print('Tip: you might consider using a more complete pathname to differentiate between files with the same name, eg:')
		print('$ ./vcf_import.py mycollection mytastycows/samples.vcf mytastiercows/samples.vcf')
		raise ValueError


	# Get filesize for every stream, used to print completion percentage and speed.
	total_filesize = float(sum([os.path.getsize(vcf) for vcf in vcf_filenames]))
	total_filesize_as_percentage = total_filesize/100


	## UPDATE METADATA ##
	collection_info = {
		'vcfs': {vcf_filenames[i] : headers[i]._asdict() for i in range(len(headers))},
		'samples': {sample: vcf_filenames[i] for i in range(len(headers)) for sample in samples[i]},
		'appending_filenames': vcf_filenames
	}

	r.table('__METADATA__').get(collection).update(r.row.merge(collection_info)).run(db)

	## UPDATE ROWS ##
	# Timers for completion percentage:
	last_iter = start_time = time.time()

	## UPDATE ROWS ##
	for multirecord in parsers:
		merged_record = merge_records(multirecord, vcf_filenames, samples)

		result = r.table(collection).get(merged_record['id']).replace(
			r.branch(r.row == None, 
				merged_record, # new record
				r.branch(r.row['REF'] == merged_record['REF'],
					r.row.merge(merged_record),
					r.error())), durability='soft').run(db)
		
		if result['errors']:
			print("\nFound mismatched REF for CHROM: {} POS: {} when confronting with data already in the database, aborting. All samples in the same collection must share the same reference genome.".format(merged_record['CHROM'], merged_record['POS']))
			raise ValueError

		if not hide_loading:
			pos = sum([f.tell() for f in filestreams])
			print('\rLoading: {0:.2f}%'.format(pos/total_filesize_as_percentage), end=' ')
			now = time.time()
			print('@ {} records/second'.format(int(1/(now-last_iter))), end=' ')
			print('- ETA: {}'.format(datetime.timedelta(seconds=int((now - start_time) * (total_filesize - pos) / pos))), end=' ')
			sys.stdout.flush()
			last_iter = now


	print('\nCompleted loading, waiting for all inserts to be flushed to disk.') 

	# flag insert job as complete once data is written to disk
	r.table(collection).sync().run(db)
	print('OK, updating metadata.')
	r.table('__METADATA__').get(collection).replace(lambda x: x.without('appending_filenames')).run(db)