forked from qiita-spots/qiita
-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
1134 lines (948 loc) · 33.7 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
r"""
Util functions (:mod: `qiita_db.util`)
======================================
..currentmodule:: qiita_db.util
This module provides different util functions.
Methods
-------
..autosummary::
:toctree: generated/
quote_data_value
scrub_data
exists_table
exists_dynamic_table
get_db_files_base_dir
compute_checksum
get_files_from_uploads_folders
get_mountpoint
insert_filepaths
check_table_cols
check_required_columns
convert_from_id
convert_to_id
get_environmental_packages
purge_filepaths
move_filepaths_to_upload_folder
move_upload_files_to_trash
add_message
"""
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from __future__ import division
from future.builtins import zip
from random import choice
from string import ascii_letters, digits, punctuation
from binascii import crc32
from bcrypt import hashpw, gensalt
from functools import partial
from os.path import join, basename, isdir, relpath, exists
from os import walk, remove, listdir, makedirs, rename
from shutil import move, rmtree
from json import dumps
from datetime import datetime
from itertools import chain
from qiita_core.exceptions import IncompetentQiitaDeveloperError
from .exceptions import QiitaDBColumnError, QiitaDBError, QiitaDBLookupError
from .sql_connection import TRN
def params_dict_to_json(options):
"""Convert a dict of parameter key-value pairs to JSON string
Parameters
----------
options : dict
The dict of options
"""
return dumps(options, sort_keys=True, separators=(',', ':'))
def scrub_data(s):
r"""Scrubs data fields of characters not allowed by PostgreSQL
disallowed characters:
' ;
Parameters
----------
s : str
The string to clean up
Returns
-------
str
The scrubbed string
"""
ret = s.replace("'", "")
ret = ret.replace(";", "")
return ret
def convert_type(obj):
"""Converts a passed item to int, float, or str in that order
Parameters
----------
obj : object
object to evaluate
Returns
-------
int, float, or str
Re-typed information from obj
Raises
------
IncompetentQiitaDeveloperError
If the object can't be converted to int, float, or string
Notes
-----
The function first tries to convert to an int. If that fails, it tries to
convert to a float. If that fails it returns the original string.
"""
item = None
if isinstance(obj, datetime):
item = str(obj)
else:
for fn in (int, float, str):
try:
item = fn(obj)
except ValueError:
continue
else:
break
if item is None:
raise IncompetentQiitaDeveloperError("Can't convert item of type %s!" %
str(type(obj)))
return item
def get_filetypes(key='type'):
"""Gets the list of possible filetypes from the filetype table
Parameters
----------
key : {'type', 'filetype_id'}, optional
Defaults to "type". Determines the format of the returned dict.
Returns
-------
dict
If `key` is "type", dict is of the form {type: filetype_id}
If `key` is "filetype_id", dict is of the form {filetype_id: type}
"""
with TRN:
if key == 'type':
cols = 'type, filetype_id'
elif key == 'filetype_id':
cols = 'filetype_id, type'
else:
raise QiitaDBColumnError("Unknown key. Pass either 'type' or "
"'filetype_id'.")
sql = 'SELECT {} FROM qiita.filetype'.format(cols)
TRN.add(sql)
return dict(TRN.execute_fetchindex())
def get_filepath_types(key='filepath_type'):
"""Gets the list of possible filepath types from the filetype table
Parameters
----------
key : {'filepath_type', 'filepath_type_id'}, optional
Defaults to "filepath_type". Determines the format of the returned
dict.
Returns
-------
dict
- If `key` is "filepath_type", dict is of the form
{filepath_type: filepath_type_id}
- If `key` is "filepath_type_id", dict is of the form
{filepath_type_id: filepath_type}
"""
with TRN:
if key == 'filepath_type':
cols = 'filepath_type, filepath_type_id'
elif key == 'filepath_type_id':
cols = 'filepath_type_id, filepath_type'
else:
raise QiitaDBColumnError("Unknown key. Pass either 'filepath_type'"
" or 'filepath_type_id'.")
sql = 'SELECT {} FROM qiita.filepath_type'.format(cols)
TRN.add(sql)
return dict(TRN.execute_fetchindex())
def get_data_types(key='data_type'):
"""Gets the list of possible data types from the data_type table
Parameters
----------
key : {'data_type', 'data_type_id'}, optional
Defaults to "data_type". Determines the format of the returned dict.
Returns
-------
dict
- If `key` is "data_type", dict is of the form
{data_type: data_type_id}
- If `key` is "data_type_id", dict is of the form
{data_type_id: data_type}
"""
with TRN:
if key == 'data_type':
cols = 'data_type, data_type_id'
elif key == 'data_type_id':
cols = 'data_type_id, data_type'
else:
raise QiitaDBColumnError("Unknown key. Pass either 'data_type_id' "
"or 'data_type'.")
sql = 'SELECT {} FROM qiita.data_type'.format(cols)
TRN.add(sql)
return dict(TRN.execute_fetchindex())
def create_rand_string(length, punct=True):
"""Returns a string of random ascii characters
Parameters
----------
length: int
Length of string to return
punct: bool, optional
Include punctuation as well as letters and numbers. Default True.
"""
chars = ''.join((ascii_letters, digits))
if punct:
chars = ''.join((chars, punctuation))
return ''.join(choice(chars) for i in range(length))
def hash_password(password, hashedpw=None):
"""Hashes password
Parameters
----------
password: str
Plaintext password
hashedpw: str, optional
Previously hashed password for bcrypt to pull salt from. If not
given, salt generated before hash
Returns
-------
str
Hashed password
Notes
-----
Relies on bcrypt library to hash passwords, which stores the salt as
part of the hashed password. Don't need to actually store the salt
because of this.
"""
# all the encode/decode as a python 3 workaround for bcrypt
if hashedpw is None:
hashedpw = gensalt()
else:
hashedpw = hashedpw.encode('utf-8')
password = password.encode('utf-8')
output = hashpw(password, hashedpw)
if isinstance(output, bytes):
output = output.decode("utf-8")
return output
def check_required_columns(keys, table):
"""Makes sure all required columns in database table are in keys
Parameters
----------
keys: iterable
Holds the keys in the dictionary
table: str
name of the table to check required columns
Raises
------
QiitaDBColumnError
If keys exist that are not in the table
RuntimeError
Unable to get columns from database
"""
with TRN:
sql = """SELECT is_nullable, column_name, column_default
FROM information_schema.columns WHERE table_name = %s"""
TRN.add(sql, [table])
cols = TRN.execute_fetchindex()
# Test needed because a user with certain permissions can query without
# error but be unable to get the column names
if len(cols) == 0:
raise RuntimeError("Unable to fetch column names for table %s"
% table)
required = set(x[1] for x in cols if x[0] == 'NO' and x[2] is None)
if len(required.difference(keys)) > 0:
raise QiitaDBColumnError("Required keys missing: %s" %
required.difference(keys))
def check_table_cols(keys, table):
"""Makes sure all keys correspond to column headers in a table
Parameters
----------
keys: iterable
Holds the keys in the dictionary
table: str
name of the table to check column names
Raises
------
QiitaDBColumnError
If a key is found that is not in table columns
RuntimeError
Unable to get columns from database
"""
with TRN:
sql = """SELECT column_name FROM information_schema.columns
WHERE table_name = %s"""
TRN.add(sql, [table])
cols = TRN.execute_fetchflatten()
# Test needed because a user with certain permissions can query without
# error but be unable to get the column names
if len(cols) == 0:
raise RuntimeError("Unable to fetch column names for table %s"
% table)
if len(set(keys).difference(cols)) > 0:
raise QiitaDBColumnError("Non-database keys found: %s" %
set(keys).difference(cols))
def get_table_cols(table):
"""Returns the column headers of table
Parameters
----------
table : str
The table name
Returns
-------
list of str
The column headers of `table`
"""
with TRN:
sql = """SELECT column_name FROM information_schema.columns
WHERE table_name=%s AND table_schema='qiita'"""
TRN.add(sql, [table])
return TRN.execute_fetchflatten()
def get_table_cols_w_type(table):
"""Returns the column headers and its type
Parameters
----------
table : str
The table name
Returns
-------
list of tuples of (str, str)
The column headers and data type of `table`
"""
with TRN:
sql = """SELECT column_name, data_type FROM information_schema.columns
WHERE table_name=%s"""
TRN.add(sql, [table])
return TRN.execute_fetchindex()
def exists_table(table):
r"""Checks if `table` exists on the database
Parameters
----------
table : str
The table name to check if exists
Returns
-------
bool
Whether `table` exists on the database or not
"""
with TRN:
sql = """SELECT exists(
SELECT * FROM information_schema.tables
WHERE table_name=%s)"""
TRN.add(sql, [table])
return TRN.execute_fetchlast()
def exists_dynamic_table(table, prefix, suffix):
r"""Checks if the dynamic `table` exists on the database, and its name
starts with prefix and ends with suffix
Parameters
----------
table : str
The table name to check if exists
prefix : str
The table name prefix
suffix : str
The table name suffix
Returns
-------
bool
Whether `table` exists on the database or not and its name
starts with prefix and ends with suffix
"""
return (table.startswith(prefix) and table.endswith(suffix) and
exists_table(table))
def get_db_files_base_dir():
r"""Returns the path to the base directory of all db files
Returns
-------
str
The path to the base directory of all db files
"""
with TRN:
TRN.add("SELECT base_data_dir FROM settings")
return TRN.execute_fetchlast()
def get_work_base_dir():
r"""Returns the path to the base directory of all db files
Returns
-------
str
The path to the base directory of all db files
"""
with TRN:
TRN.add("SELECT base_work_dir FROM settings")
return TRN.execute_fetchlast()
def compute_checksum(path):
r"""Returns the checksum of the file pointed by path
Parameters
----------
path : str
The path to compute the checksum
Returns
-------
int
The file checksum
"""
crc = 0
filepaths = []
if isdir(path):
for name, dirs, files in walk(path):
join_f = partial(join, name)
filepaths.extend(list(map(join_f, files)))
else:
filepaths.append(path)
for fp in filepaths:
with open(fp, "Ub") as f:
# Go line by line so we don't need to load the entire file
for line in f:
if crc is None:
crc = crc32(line)
else:
crc = crc32(line, crc)
# We need the & 0xffffffff in order to get the same numeric value across
# all python versions and platforms
return crc & 0xffffffff
def get_files_from_uploads_folders(study_id):
"""Retrieve files in upload folders
Parameters
----------
study_id : str
The study id of which to retrieve all upload folders
Returns
-------
list
List of the filepaths for upload for that study
"""
fp = []
for pid, p in get_mountpoint("uploads", retrieve_all=True):
t = join(p, study_id)
if exists(t):
fp.extend([(pid, f)
for f in listdir(t)
if not f.startswith('.') and not isdir(join(t, f))])
return fp
def move_upload_files_to_trash(study_id, files_to_move):
"""Move files to a trash folder within the study_id upload folder
Parameters
----------
study_id : int
The study id
files_to_move : list
List of tuples (folder_id, filename)
Raises
------
QiitaDBError
If folder_id or the study folder don't exist and if the filename to
erase matches the trash_folder, internal variable
"""
trash_folder = 'trash'
folders = {k: v for k, v in get_mountpoint("uploads", retrieve_all=True)}
for fid, filename in files_to_move:
if filename == trash_folder:
raise QiitaDBError("You can not erase the trash folder: %s"
% trash_folder)
if fid not in folders:
raise QiitaDBError("The filepath id: %d doesn't exist in the "
"database" % fid)
foldername = join(folders[fid], str(study_id))
if not exists(foldername):
raise QiitaDBError("The upload folder for study id: %d doesn't "
"exist" % study_id)
trashpath = join(foldername, trash_folder)
if not exists(trashpath):
makedirs(trashpath)
fullpath = join(foldername, filename)
new_fullpath = join(foldername, trash_folder, filename)
if not exists(fullpath):
raise QiitaDBError("The filepath %s doesn't exist in the system" %
fullpath)
rename(fullpath, new_fullpath)
def get_mountpoint(mount_type, retrieve_all=False):
r""" Returns the most recent values from data directory for the given type
Parameters
----------
mount_type : str
The data mount type
retrieve_all : bool
Retrieve all the available mount points or just the active one
Returns
-------
list
List of tuple, where: [(id_mountpoint, filepath_of_mountpoint)]
"""
with TRN:
if retrieve_all:
sql = """SELECT data_directory_id, mountpoint, subdirectory
FROM qiita.data_directory
WHERE data_type=%s ORDER BY active DESC"""
else:
sql = """SELECT data_directory_id, mountpoint, subdirectory
FROM qiita.data_directory
WHERE data_type=%s AND active=true"""
TRN.add(sql, [mount_type])
result = TRN.execute_fetchindex()
basedir = get_db_files_base_dir()
return [(d, join(basedir, m, s)) for d, m, s in result]
def get_mountpoint_path_by_id(mount_id):
r""" Returns the mountpoint path for the mountpoint with id = mount_id
Parameters
----------
mount_id : int
The mountpoint id
Returns
-------
str
The mountpoint path
"""
with TRN:
sql = """SELECT mountpoint, subdirectory FROM qiita.data_directory
WHERE data_directory_id=%s"""
TRN.add(sql, [mount_id])
mountpoint, subdirectory = TRN.execute_fetchindex()[0]
return join(get_db_files_base_dir(), mountpoint, subdirectory)
def insert_filepaths(filepaths, obj_id, table, filepath_table,
move_files=True):
r"""Inserts `filepaths` in the database.
Since the files live outside the database, the directory in which the files
lives is controlled by the database, so it moves the filepaths from
its original location to the controlled directory.
Parameters
----------
filepaths : iterable of tuples (str, int)
The list of paths to the raw files and its filepath type identifier
obj_id : int
Id of the object calling the functions. Disregarded if move_files
is False
table : str
Table that holds the file data.
filepath_table : str
Table that holds the filepath information
move_files : bool, optional
Whether or not to copy from the given filepaths to the db filepaths
default: True
Returns
-------
list of int
List of the filepath_id in the database for each added filepath
"""
with TRN:
new_filepaths = filepaths
dd_id, mp = get_mountpoint(table)[0]
base_fp = join(get_db_files_base_dir(), mp)
if move_files:
# Generate the new fileapths. Format: DataId_OriginalName
# Keeping the original name is useful for checking if the RawData
# alrady exists on the DB
db_path = partial(join, base_fp)
new_filepaths = [
(db_path("%s_%s" % (obj_id, basename(path))), id_)
for path, id_ in filepaths]
# Move the original files to the controlled DB directory
for old_fp, new_fp in zip(filepaths, new_filepaths):
move(old_fp[0], new_fp[0])
# In case the transaction executes a rollback, we need to
# make sure the files have not been moved
TRN.add_post_rollback_func(move, new_fp[0], old_fp[0])
def str_to_id(x):
return (x if isinstance(x, (int, long))
else convert_to_id(x, "filepath_type"))
paths_w_checksum = [(relpath(path, base_fp), str_to_id(id_),
compute_checksum(path))
for path, id_ in new_filepaths]
# Create the list of SQL values to add
values = [[path, pid, checksum, 1, dd_id]
for path, pid, checksum in paths_w_checksum]
# Insert all the filepaths at once and get the filepath_id back
sql = """INSERT INTO qiita.{0}
(filepath, filepath_type_id, checksum,
checksum_algorithm_id, data_directory_id)
VALUES (%s, %s, %s, %s, %s)
RETURNING filepath_id""".format(filepath_table)
idx = TRN.index
TRN.add(sql, values, many=True)
# Since we added the query with many=True, we've added len(values)
# queries to the transaction, so the ids are in the last idx queries
return list(chain.from_iterable(
chain.from_iterable(TRN.execute()[idx:])))
def purge_filepaths():
r"""Goes over the filepath table and remove all the filepaths that are not
used in any place
"""
with TRN:
# Get all the (table, column) pairs that reference to the filepath
# table. Adapted from http://stackoverflow.com/q/5347050/3746629
sql = """SELECT R.TABLE_NAME, R.column_name
FROM INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE u
INNER JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS FK
ON U.CONSTRAINT_CATALOG = FK.UNIQUE_CONSTRAINT_CATALOG
AND U.CONSTRAINT_SCHEMA = FK.UNIQUE_CONSTRAINT_SCHEMA
AND U.CONSTRAINT_NAME = FK.UNIQUE_CONSTRAINT_NAME
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE R
ON R.CONSTRAINT_CATALOG = FK.CONSTRAINT_CATALOG
AND R.CONSTRAINT_SCHEMA = FK.CONSTRAINT_SCHEMA
AND R.CONSTRAINT_NAME = FK.CONSTRAINT_NAME
WHERE U.COLUMN_NAME = 'filepath_id'
AND U.TABLE_SCHEMA = 'qiita'
AND U.TABLE_NAME = 'filepath'"""
TRN.add(sql)
union_str = " UNION ".join(
["SELECT %s FROM qiita.%s WHERE %s IS NOT NULL" % (col, table, col)
for table, col in TRN.execute_fetchindex()])
# Get all the filepaths from the filepath table that are not
# referenced from any place in the database
sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
FROM qiita.filepath FP JOIN qiita.filepath_type FPT
ON FP.filepath_type_id = FPT.filepath_type_id
WHERE filepath_id NOT IN (%s)""" % union_str
TRN.add(sql)
# We can now go over and remove all the filepaths
sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
for fp_id, fp, fp_type, dd_id in TRN.execute_fetchindex():
TRN.add(sql, [fp_id])
# Remove the data
fp = join(get_mountpoint_path_by_id(dd_id), fp)
if exists(fp):
if fp_type is 'directory':
func = rmtree
else:
func = remove
TRN.add_post_commit_func(func, fp)
TRN.execute()
def move_filepaths_to_upload_folder(study_id, filepaths):
r"""Goes over the filepaths list and moves all the filepaths that are not
used in any place to the upload folder of the study
Parameters
----------
study_id : int
The study id to where the files should be returned to
filepaths : list
List of filepaths to move to the upload folder
"""
with TRN:
uploads_fp = join(get_mountpoint("uploads")[0][1], str(study_id))
path_builder = partial(join, uploads_fp)
# We can now go over and remove all the filepaths
sql = """DELETE FROM qiita.filepath WHERE filepath_id=%s"""
for fp_id, fp, _ in filepaths:
TRN.add(sql, [fp_id])
# removing id from the raw data filename
filename = basename(fp).split('_', 1)[1]
destination = path_builder(filename)
TRN.add_post_rollback_func(move, destination, fp)
move(fp, destination)
TRN.execute()
def get_filepath_id(table, fp):
"""Return the filepath_id of fp
Parameters
----------
table : str
The table type so we can search on this one
fp : str
The full filepath
Returns
-------
int
The filepath id forthe given filepath
Raises
------
QiitaDBError
If fp is not stored in the DB.
"""
with TRN:
_, mp = get_mountpoint(table)[0]
base_fp = join(get_db_files_base_dir(), mp)
sql = "SELECT filepath_id FROM qiita.filepath WHERE filepath=%s"
TRN.add(sql, [relpath(fp, base_fp)])
fp_id = TRN.execute_fetchindex()
# check if the query has actually returned something
if not fp_id:
raise QiitaDBError("Filepath not stored in the database")
# If there was a result it was a single row and and single value,
# hence access to [0][0]
return fp_id[0][0]
def filepath_id_to_rel_path(filepath_id):
"""Gets the full path, relative to the base directory
Returns
-------
str
The relative path for the given filepath id
"""
with TRN:
sql = """SELECT mountpoint, subdirectory, filepath
FROM qiita.filepath
JOIN qiita.data_directory USING (data_directory_id)
WHERE filepath_id = %s"""
TRN.add(sql, [filepath_id])
# It should be only one row
return join(*TRN.execute_fetchindex()[0])
def filepath_ids_to_rel_paths(filepath_ids):
"""Gets the full paths, relative to the base directory
Parameters
----------
filepath_ids : list of int
Returns
-------
dict where keys are ints and values are str
{filepath_id: relative_path}
"""
if not filepath_ids:
return {}
with TRN:
sql = """SELECT filepath_id, mountpoint, subdirectory, filepath
FROM qiita.filepath
JOIN qiita.data_directory USING (data_directory_id)
WHERE filepath_id IN %s"""
TRN.add(sql, [tuple(filepath_ids)])
return {row[0]: join(*row[1:]) for row in TRN.execute_fetchindex()}
def convert_to_id(value, table, text_col=None):
"""Converts a string value to its corresponding table identifier
Parameters
----------
value : str
The string value to convert
table : str
The table that has the conversion
text_col : str, optional
Column holding the string value. Defaults to same as table name.
Returns
-------
int
The id correspinding to the string
Raises
------
QiitaDBLookupError
The passed string has no associated id
"""
text_col = table if text_col is None else text_col
with TRN:
sql = "SELECT {0}_id FROM qiita.{0} WHERE {1} = %s".format(
table, text_col)
TRN.add(sql, [value])
_id = TRN.execute_fetchindex()
if not _id:
raise QiitaDBLookupError("%s not valid for table %s"
% (value, table))
# If there was a result it was a single row and and single value,
# hence access to [0][0]
return _id[0][0]
def convert_from_id(value, table):
"""Converts an id value to its corresponding string value
Parameters
----------
value : int
The id value to convert
table : str
The table that has the conversion
Returns
-------
str
The string correspinding to the id
Raises
------
QiitaDBLookupError
The passed id has no associated string
"""
with TRN:
sql = "SELECT {0} FROM qiita.{0} WHERE {0}_id = %s".format(table)
TRN.add(sql, [value])
string = TRN.execute_fetchindex()
if not string:
raise QiitaDBLookupError("%s not valid for table %s"
% (value, table))
# If there was a result it was a single row and and single value,
# hence access to [0][0]
return string[0][0]
def get_count(table):
"""Counts the number of rows in a table
Parameters
----------
table : str
The name of the table of which to count the rows
Returns
-------
int
"""
with TRN:
sql = "SELECT count(1) FROM %s" % table
TRN.add(sql)
return TRN.execute_fetchlast()
def check_count(table, exp_count):
"""Checks that the number of rows in a table equals the expected count
Parameters
----------
table : str
The name of the table of which to count the rows
exp_count : int
The expected number of rows in the table
Returns
-------
bool
"""
obs_count = get_count(table)
return obs_count == exp_count
def get_preprocessed_params_tables():
"""returns a list of preprocessed parmaeter tables
Returns
-------
list or str
"""
with TRN:
sql = """SELECT table_name FROM information_schema.tables
WHERE table_schema = 'qiita'
AND SUBSTR(table_name, 1, 13) = 'preprocessed_'
AND table_name NOT IN ('preprocessed_data',
'preprocessed_filepath',
'preprocessed_processed_data')
ORDER BY table_name"""
TRN.add(sql)
return TRN.execute_fetchflatten()
def get_processed_params_tables():
"""Returns a list of all tables starting with "processed_params_"
Returns
-------
list of str
"""
with TRN:
sql = """SELECT table_name FROM information_schema.tables
WHERE table_schema = 'qiita'
AND SUBSTR(table_name, 1, 17) = 'processed_params_'
ORDER BY table_name"""
TRN.add(sql)
return TRN.execute_fetchflatten()
def get_environmental_packages():
"""Get the list of available environmental packages
Returns
-------
list of (str, str)
The available environmental packages. The first string is the
environmental package name and the second string is the table where
the metadata for the environmental package is stored
"""
with TRN:
TRN.add("SELECT * FROM qiita.environmental_package")
return TRN.execute_fetchindex()
def get_timeseries_types():
"""Get the list of available timeseries types
Returns
-------
list of (int, str, str)
The available timeseries types. Each timeseries type is defined by the
tuple (timeseries_id, timeseries_type, intervention_type)
"""
with TRN:
sql = "SELECT * FROM qiita.timeseries_type ORDER BY timeseries_type_id"
TRN.add(sql)
return TRN.execute_fetchindex()