def get_transaction_list_details(transaction_list): """Return complete data set on a specified transaction.""" query = (Files().select( Files.transaction.alias('upload_id'), fn.Max(Transactions.updated).alias('upload_date'), fn.Min(Files.mtime).alias('file_date_start'), fn.Max(Files.mtime).alias('file_date_end'), fn.Min(Transactions.submitter).alias('uploaded_by_id'), fn.Sum(Files.size).alias('bundle_size'), fn.Count(Files.id).alias('file_count'), fn.Min(Transactions.updated).alias('upload_datetime'), fn.Min(Transactions.proposal).alias('proposal_id'), fn.Min(Transactions.instrument).alias('instrument_id')).join( Transactions).where( Files.transaction << transaction_list).group_by( Files.transaction)) return { str(r['upload_id']): { 'upload_id': str(r['upload_id']), 'upload_date': r['upload_date'].date().strftime('%Y-%m-%d'), 'file_date_start': r['file_date_start'].date().strftime('%Y-%m-%d'), 'file_date_end': r['file_date_end'].date().strftime('%Y-%m-%d'), 'uploaded_by_id': int(r['uploaded_by_id']), 'bundle_size': int(r['bundle_size']), 'file_count': int(r['file_count']), 'upload_datetime': r['upload_date'].strftime('%Y-%m-%d %H:%M:%S'), 'proposal_id': r['proposal_id'], 'instrument_id': r['instrument_id'] } for r in query.dicts() }
def _get_file_list(transaction_id): where_clause = Files().where_clause({'transaction_id': transaction_id}) files_list = (Files .select() .where(where_clause) .order_by(Files.name)) return {f.id: f.to_hash() for f in files_list}
def _get_file_details(file_list): query = Files().select().where(Files.id << file_list) if query.count() == 0: message = 'No files from the list {0} were located'.format( file_list) raise HTTPError('404 Not Found', message) return [{ 'file_id': f.id, 'relative_local_path': '{0}/{1}'.format(f.subdir.rstrip('/'), f.name), 'file_size_bytes': f.size, 'hashtype': f.hashtype, 'hashsum': f.hashsum } for f in query]
def _get_earliest_latest(item_type, item_list, time_basis): accepted_item_types = list( set(QueryBase.object_type_mappings.keys() + QueryBase.object_type_mappings.values())) accepted_time_basis_types = [ 'submitted', 'modified', 'created', 'submit', 'modified', 'create', 'submit_time', 'modified_time', 'create_time', 'submitted_date', 'modified_date', 'created_date', ] item_type = QueryBase.object_type_mappings.get(item_type) time_basis = time_basis.lower() if item_type not in accepted_item_types or time_basis not in accepted_time_basis_types: raise HTTPError('400 Invalid Query') short_time_basis = time_basis[:5] time_basis = { 'submi': lambda x: 'submitted', 'modif': lambda x: 'modified', 'creat': lambda x: 'created' }[short_time_basis](short_time_basis) search_field = getattr(Transactions, '{0}_id'.format(item_type)) if time_basis == 'submitted': query = Transactions().select( fn.Min(Transactions.updated).alias('earliest'), fn.Max(Transactions.updated).alias('latest'), ) if time_basis in ['modified', 'created']: time_basis_field = getattr(Files, '{0}time'.format(time_basis[:1])) query = Files().select( fn.Min(time_basis_field).alias('earliest'), fn.Max(time_basis_field).alias('latest'), ).join(Transactions) query = query.where(search_field << item_list) row = query.get() if row.earliest is None or row.latest is None: message = '' raise HTTPError('404 Not Found', message) return { 'earliest': row.earliest.strftime('%Y-%m-%d %H:%M:%S'), 'latest': row.latest.strftime('%Y-%m-%d %H:%M:%S') }
def _get_files_for_kv_pair(key, value): # get the id of the key to look for try: k = Keys().select( Keys.id).where(fn.Lower(Keys.key) == key.lower()).get() val = Values().select(Values.id).where(Values.value == value).get() tkv_where_clause = TransactionKeyValue().where_clause({ 'key_id': k, 'value_id': val }) tkv_list = TransactionKeyValue().select().where(tkv_where_clause) transaction_list = [t.transaction_id for t in tkv_list] files_query = Files().select().where( Files.transaction << transaction_list) except DoesNotExist: # invalid value return [] return [f.to_hash() for f in files_query]
def _search_by_dates(object_type, object_id_list, start_date, end_date, time_basis): time_column_name = QueryBase.time_basis_mappings.get(time_basis) object_type_column_name = QueryBase.object_type_mappings.get( object_type) if time_basis == 'submitted': time_column = getattr(Transactions, time_column_name) else: time_column = getattr(Files, time_column_name) object_type_column = getattr(Transactions, object_type_column_name) where_clause = Expression(time_column, OP.GTE, start_date) where_clause &= Expression(time_column, OP.LTE, end_date) where_clause &= (object_type_column << object_id_list) query = Files().select(Files.id, time_column.alias('filedate'), Files.size, Files.transaction).join(Transactions) query = query.where(where_clause).order_by(time_column).naive() results = { 'day_graph': { 'by_date': { 'available_dates': {}, 'file_count': {}, 'file_volume': {}, 'transactions': {}, 'file_volume_array': {}, 'transaction_count_array': {} } }, 'summary_totals': { 'upload_stats': { 'proposal': {}, 'instrument': {}, 'user': {} }, 'total_file_count': 0, 'total_size_bytes': 0, 'total_size_string': '' }, 'transaction_info': { 'transaction': {}, 'proposal': {}, 'instrument': {}, 'user': {} } } transaction_cache = {} for item in query.iterator(): if item.transaction_id not in transaction_cache: t_info = item.transaction.to_hash(0) transaction_cache[item.transaction_id] = t_info else: t_info = transaction_cache[item.transaction_id] SummarizeByDate._summarize_by_date(results['day_graph']['by_date'], item) SummarizeByDate._update_transaction_info_block( results['transaction_info'], item, t_info) SummarizeByDate._summarize_upload_stats( results['summary_totals']['upload_stats'], t_info) results['summary_totals']['total_file_count'] += 1 results['summary_totals']['total_size_bytes'] += item.size return results