def data_upgrades(): """ COPY read_group.genomic_file_id TO read_group_genomic_file.read_group_id COPY read_group.kf_id TO read_group_genomic_file.read_group_id """ connection = op.get_bind() read_group_genomic_file = sa.Table( 'read_group_genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11), default=kf_id_generator('RF')), sa.Column('genomic_file_id', KfId(length=11)), sa.Column('read_group_id', KfId(length=11)), sa.Column('created_at', sa.DateTime(), default=datetime.now), sa.Column('modified_at', sa.DateTime(), default=datetime.now), sa.Column('uuid', postgresql.UUID(), nullable=True, default=uuid_generator)) read_group = sa.Table('read_group', sa.MetaData(), sa.Column('kf_id', KfId(length=11)), sa.Column('genomic_file_id', KfId(length=11))) results = connection.execute( sa.select([read_group.c.genomic_file_id, read_group.c.kf_id])).fetchall() for gf, rg in results: if not gf or not rg: continue connection.execute(read_group_genomic_file.insert().values( genomic_file_id=gf, read_group_id=rg))
def data_upgrades(): """ COPY sequencing_experiment.genomic_file_id TO sequencing_experiment_genomic_file.sequencing_experiment_id COPY sequencing_experiment.kf_id TO sequencing_experiment_genomic_file.sequencing_experiment_id """ connection = op.get_bind() sequencing_experiment_genomic_file = sa.Table( 'sequencing_experiment_genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11), default=kf_id_generator('RF')), sa.Column('genomic_file_id', KfId(length=11)), sa.Column('sequencing_experiment_id', KfId(length=11)), sa.Column('created_at', sa.DateTime(), default=datetime.now), sa.Column('modified_at', sa.DateTime(), default=datetime.now), sa.Column('uuid', postgresql.UUID(), nullable=True, default=uuid_generator)) genomic_file = sa.Table( 'genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11)), sa.Column('sequencing_experiment_id', KfId(length=11))) results = connection.execute( sa.select( [genomic_file.c.kf_id, genomic_file.c.sequencing_experiment_id])).fetchall() for gf, seq_exp in results: if not gf or not seq_exp: continue connection.execute(sequencing_experiment_genomic_file.insert().values( genomic_file_id=gf, sequencing_experiment_id=seq_exp))
def data_upgrades(): """ COPY genomic_file.biospecimen_id TO biospecimen_genomic_file.biospecimen_id COPY genomic_file.kf_id TO biospecimen_genomic_file.genomic_file_id DROP genomic_file.biospecimen_id """ connection = op.get_bind() biospecimen_genomic_file = sa.Table( 'biospecimen_genomic_file', sa.MetaData(), sa.Column('kf_id', dataservice.api.common.model.KfId(length=11), default=kf_id_generator('BG')), sa.Column('genomic_file_id', dataservice.api.common.model.KfId(length=11)), sa.Column('biospecimen_id', dataservice.api.common.model.KfId(length=11)), sa.Column('created_at', sa.DateTime(), default=datetime.now), sa.Column('modified_at', sa.DateTime(), default=datetime.now), ) genomic_file = sa.Table( 'genomic_file', sa.MetaData(), sa.Column('kf_id', dataservice.api.common.model.KfId(length=11)), sa.Column('biospecimen_id', dataservice.api.common.model.KfId(length=11)), ) results = connection.execute( sa.select([ genomic_file.c.biospecimen_id, genomic_file.c.kf_id, ])).fetchall() for bs, gf in results: if not bs or not gf: continue connection.execute(biospecimen_genomic_file.insert().values( biospecimen_id=bs, genomic_file_id=gf))
def test_kf_id(): """ Test that kf_ids are generated correctly Generates 1000 ids and makes sure they are correct length and dont contain any invalid characters """ for _ in range(1000): prefix = ''.join(random.sample(string.ascii_uppercase, 2)) kf_id = kf_id_generator(prefix)() assert kf_id[:2] == prefix assert len(kf_id) == 11 assert kf_id[2] == '_' assert 'I' not in kf_id[2:] assert 'L' not in kf_id[2:] assert 'O' not in kf_id[2:] assert 'U' not in kf_id[2:] assert re.search(r'^' + prefix + r'_[A-HJ-KM-NP-TV-Z0-9]{8}', kf_id)
def data_downgrades(): """ COPY read_group_genomic_file.genomic_file_id TO read_group.genomic_file_id # NB This will lose all information relating many files to one read_group """ connection = op.get_bind() read_group_genomic_file = sa.Table( 'read_group_genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11), default=kf_id_generator('RF')), sa.Column('genomic_file_id', KfId(length=11)), sa.Column('read_group_id', KfId(length=11)), sa.Column('created_at', sa.DateTime(), default=datetime.now), sa.Column('modified_at', sa.DateTime(), default=datetime.now), sa.Column('uuid', postgresql.UUID(), nullable=True, default=uuid_generator)) read_group = sa.Table( 'read_group', sa.MetaData(), sa.Column('kf_id', KfId(length=11)), sa.Column('genomic_file_id', KfId(length=11)), ) results = connection.execute( sa.select([ read_group_genomic_file.c.genomic_file_id, read_group_genomic_file.c.read_group_id, ])).fetchall() for gf, rg in results: if not gf or not rg: continue connection.execute(read_group.update().where( read_group.c.kf_id == rg).values(genomic_file_id=gf))
def data_downgrades(): """ COPY sequencing_experiment_genomic_file.genomic_file_id TO sequencing_experiment.genomic_file_id # NB This will lose all information relating many files to one sequencing_experiment """ connection = op.get_bind() sequencing_experiment_genomic_file = sa.Table( 'sequencing_experiment_genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11), default=kf_id_generator('RF')), sa.Column('genomic_file_id', KfId(length=11)), sa.Column('sequencing_experiment_id', KfId(length=11)), sa.Column('created_at', sa.DateTime(), default=datetime.now), sa.Column('modified_at', sa.DateTime(), default=datetime.now), sa.Column('uuid', postgresql.UUID(), nullable=True, default=uuid_generator)) genomic_file = sa.Table( 'genomic_file', sa.MetaData(), sa.Column('kf_id', KfId(length=11)), sa.Column('sequencing_experiment_id', KfId(length=11))) results = connection.execute( sa.select([ sequencing_experiment_genomic_file.c.genomic_file_id, sequencing_experiment_genomic_file.c.sequencing_experiment_id, ])).fetchall() for gf, seq_exp in results: if not gf or not seq_exp: continue connection.execute( genomic_file.update().where(genomic_file.c.kf_id == gf).values( sequencing_experiment_id=seq_exp))
def test_bad_foreign_key(self, client, entities, endpoint, method, field): """ Test bad foreign key Foregin key is a valid kf_id but refers an entity that doesn't exist """ # Setup inputs inputs = ENTITY_PARAMS['fields'][endpoint].copy() model_cls = ENDPOINT_ENTITY_MAP.get(endpoint) entity = entities.get(model_cls)[0] _add_foreign_keys(inputs, entity) inputs.update({field: id_service.kf_id_generator('ZZ')()}) # Setup endpoint url = endpoint if method.lower() in {'put', 'patch'}: url = '{}/{}'.format(endpoint, entity.kf_id) call_func = getattr(client, method.lower()) resp = call_func(url, data=json.dumps(inputs), headers={'Content-Type': 'application/json'}) body = json.loads(resp.data.decode('utf-8')) assert body['_status']['code'] == 400 assert 'does not exist' in body['_status']['message']
def kf_id(cls): kf_id = db.Column(KfId(), primary_key=True, doc="ID assigned by Kids First", default=kf_id_generator(cls.__prefix__)) return kf_id