class BianaParser(object): """ General Parser Class to biana """ def __init__( self, default_db_description=None, default_script_name="bianaParser.py", default_script_description="This file implements a program that fills up tables in database biana with information from distinct databases", #content_type_list = [], additional_compulsory_arguments=[], additional_optional_arguments=[]): """ Starts the bianaParser Object """ print "Parser object started" self.compulsory_arguments = [ ("input-identifier=", None, "path or file name of input file(s) containing database data. Path names must end with \"/\"." ), ("biana-dbname=", None, "name of database biana to be used"), ("biana-dbhost=", None, "name of host where database biana to be used is placed"), ("database-name=", None, "internal identifier name to this database (it must be unique in the database)" ), ("database-version=", None, "version of the database to be inserted") ] self.compulsory_arguments.extend(additional_compulsory_arguments) self.optional_arguments = [ ("biana-dbuser="******"username accessing the database (not required in most systems)"), ("biana-dbpass="******"password of username accessing the database (not required in most systems" ), ("help", None, "prints this message and exits"), ("verbose", 0, "prints process info to stdout"), ("log-file=", None, "Prints a log file of the parsing result (number of inserted proteins, references...)" ), ("time-control", None, "prints to stderr a control of the timing of the parser"), ("database-description=", default_db_description, "Description of the database to be inserted."), ("optimize-for-parsing", None, "Optimizes database for parsing"), ("promiscuous", False, "sets the database to be parsed as promiscuous (whose entities can be included in multi user entities)" ) ] #("mode=","scratch","sets mode to be used by parser. Valid modes are: \"scratch\" (biana database is empty, create it from scratch) or \"tables\" (fill only tables indicated in tables_to_fill (see code)")] self.optional_arguments.extend(additional_optional_arguments) self.script_name = default_script_name self.script_description = default_script_description #Parse general methods self.arguments_dic = self.parseArguments() self.input_file = self.arguments_dic["input-identifier"] self.biana_dbname = self.arguments_dic["biana-dbname"] self.biana_dbhost = self.arguments_dic["biana-dbhost"] self.sourcedb_name = self.arguments_dic["database-name"] self.sourcedb_version = self.arguments_dic["database-version"] self.biana_dbuser = self.arguments_dic["biana-dbuser"] self.biana_dbpass = self.arguments_dic["biana-dbpass"] self.help = self.arguments_dic["help"] self.verbose = self.arguments_dic["verbose"] self.time_control = self.arguments_dic["time-control"] self.log_file = self.arguments_dic["log-file"] self.optimize_for_parsing = self.arguments_dic["optimize-for-parsing"] #self.mode = self.arguments_dic["mode"] self.is_promiscuous = self.arguments_dic[ "promiscuous"] # Flag deciding whether database gives information that is going to be added to more than one user entiries self.database = None if self.arguments_dic.has_key("default-attribute"): self.default_eE_attribute = self.arguments_dic[ "default-attribute"] # default externalEntityAttribute specified by the particular database parser (it will be overwritten in the parser if not given as argument) else: self.default_eE_attribute = "" #self.content_type_list = content_types self.attribute_identifier_to_max_value_length = {} for key, value in EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTES + EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES: value = value.lower() if value.startswith("varchar("): self.attribute_identifier_to_max_value_length[ key.lower()] = int(value[len("varchar("):].rstrip(")")) elif value.startswith("char("): self.attribute_identifier_to_max_value_length[ key.lower()] = int(value[len("char("):].rstrip(")")) elif value.startswith("text("): self.attribute_identifier_to_max_value_length[ key.lower()] = int(value[len("text("):].rstrip(")")) elif value.startswith("integer("): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[ key.lower()] = len( str((2**8)**int(value[len("integer("):].rstrip(")")))) elif value.startswith("integer"): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[ key.lower()] = len(str((2**8)**4)) elif value.startswith("smallint"): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[ key.lower()] = len(str((2**8)**2)) elif value.startswith("text"): self.attribute_identifier_to_max_value_length[ key.lower()] = 999999 def verify_attribute_length(self, attribute_identifier, attribute_value): attribute_value = attribute_value.lower() length = len(attribute_value) if length > self.attribute_identifier_to_max_value_length[ attribute_identifier]: sys.stderr.write("\n%s longer than expected: %s\n" % (attribute_identifier, attribute_value)) elif length == 0: sys.stderr.write("\n%s has 0 length: %s\n" % (attribute_identifier, attribute_value)) def start(self): print "Parser started" if isinstance(self.sourcedb_name, int) or isinstance( self.sourcedb_version, int): sys.stderr.write( "You must insert correctly the database name and database version\n" ) sys.exit(1) #if( self.mode=="scratch" ): self.database_description = self.arguments_dic["database-description"] # Log dictionary where all log information will be stored self.log = {} if self.log_file: self.log_file_fd = file(self.log_file, 'w') self.biana_access = BianaDBaccess(dbname=self.biana_dbname, dbhost=self.biana_dbhost, dbuser=self.biana_dbuser, use_buffer=True, dbpassword=self.biana_dbpass, lock_tables=True, check_integrity=True) # check data consistency # Time related self.initial_time = time.time() # Insert the information associated to the parsed database # Introduce database info into biana database #if( self.mode=="scratch" ): self.database = ExternalDatabase( databaseName=self.sourcedb_name, databaseVersion=self.sourcedb_version, databaseFile=self.input_file.split(os.sep)[-1], databaseDescription=self.database_description, defaultExternalEntityAttribute=self.default_eE_attribute, isPromiscuous=self.is_promiscuous) #content_type_list = self.content_type_list) self.biana_access.insert_new_external_database( externalDatabase=self.database) # Open the input file descriptor # This is a responsability of subclasses method try: if self.optimize_for_parsing: self.biana_access.optimize_database_for(mode="parsing") self.parse_database() # set the parsing time self.database.set_parsing_time(int(time.time() - self.initial_time)) # Updates the information that this external database has inserted self.biana_access.update_external_database_external_entity_attributes( self.database) self.close() except: traceback.print_exc() sys.stderr.write( "ERROR WHILE PARSING. ALL MODIFICATIONS ARE GOING TO BE DELETED\n" ) self.biana_access._rollback() sys.exit(1) # METHODS def close(self): ## LAST STEP: CLOSE DATABASE CONNECTION IMPORTANT !!!! ## As bianaDBaccess uses an internal buffer, it is necessary to close the connection to sure that all inserts are correctly done, as well as unlock tables self.biana_access.close() if self.time_control: sys.stderr.write("Total time: %s seconds\n" % (time.time() - self.initial_time)) if self.log_file: self.log_file_fd.write(self.get_log_string()) self.log_file_fd.close() if self.verbose: sys.stderr.write("\n Total time: %s \n" % (time.time() - self.initial_time)) sys.stderr.write(self.get_log_string()) ## GENERAL PARSER METHODS ## def parseArguments(self): """ Method that returns a dictionary with the values of the arguments """ arguments = self.compulsory_arguments + self.optional_arguments # Set all default values #return_values = [i[1] for i in arguments] return_dict = {} for i in arguments: return_dict[i[0].replace("=", "")] = i[1] # Obtain a list with the names of all arguments list_arguments = [argument[0] for argument in arguments] # It can be of the following way because it contains "=" digit #list_arguments = return_dict.keys() # Parse arguments try: opts, args = getopt.getopt(sys.argv[2:], "", list_arguments) except getopt.GetoptError, bad_opt: # return error in parsing parameters, and return void list raise ValueError("%s\n" % (bad_opt.__str__())) # If there is no error, continue with the parsing for option, value in opts: if option == "--help": self.print_help() sys.exit(2) for actual_argument in list_arguments: # Delete the "=" value if it has temp_arg = actual_argument.replace("=", "") if option == "--" + temp_arg: if value == "": return_dict[temp_arg] = 1 else: return_dict[temp_arg] = value # Check for all compulsory arguments: for comp_arg in self.compulsory_arguments: if return_dict[comp_arg[0].replace("=", "")] is None: sys.stderr.write("%s argument is not defined!\n" % (comp_arg[0].replace("=", ""))) self.print_help() sys.exit(2) return return_dict
class BianaParser(object): """ General Parser Class to biana """ def __init__(self, default_db_description = None, default_script_name = "bianaParser.py", default_script_description = "This file implements a program that fills up tables in database biana with information from distinct databases", #content_type_list = [], additional_compulsory_arguments = [], additional_optional_arguments = []): """ Starts the bianaParser Object """ print "Parser object started" self.compulsory_arguments = [ ("input-identifier=",None,"path or file name of input file(s) containing database data. Path names must end with \"/\"."), ("biana-dbname=",None,"name of database biana to be used"), ("biana-dbhost=",None,"name of host where database biana to be used is placed"), ("database-name=",None,"internal identifier name to this database (it must be unique in the database)"), ("database-version=",None,"version of the database to be inserted") ] self.compulsory_arguments.extend(additional_compulsory_arguments) self.optional_arguments = [ ("biana-dbuser="******"username accessing the database (not required in most systems)"), ("biana-dbpass="******"password of username accessing the database (not required in most systems"), ("help",None,"prints this message and exits"), ("verbose",0,"prints process info to stdout"), ("log-file=",None,"Prints a log file of the parsing result (number of inserted proteins, references...)"), ("time-control",None,"prints to stderr a control of the timing of the parser"), ("database-description=",default_db_description,"Description of the database to be inserted."), ("optimize-for-parsing",None,"Optimizes database for parsing"), ("promiscuous",False,"sets the database to be parsed as promiscuous (whose entities can be included in multi user entities)") ] #("mode=","scratch","sets mode to be used by parser. Valid modes are: \"scratch\" (biana database is empty, create it from scratch) or \"tables\" (fill only tables indicated in tables_to_fill (see code)")] self.optional_arguments.extend(additional_optional_arguments) self.script_name = default_script_name self.script_description = default_script_description #Parse general methods self.arguments_dic = self.parseArguments() self.input_file = self.arguments_dic["input-identifier"] self.biana_dbname = self.arguments_dic["biana-dbname"] self.biana_dbhost = self.arguments_dic["biana-dbhost"] self.sourcedb_name = self.arguments_dic["database-name"] self.sourcedb_version = self.arguments_dic["database-version"] self.biana_dbuser = self.arguments_dic["biana-dbuser"] self.biana_dbpass = self.arguments_dic["biana-dbpass"] self.help = self.arguments_dic["help"] self.verbose = self.arguments_dic["verbose"] self.time_control = self.arguments_dic["time-control"] self.log_file = self.arguments_dic["log-file"] self.optimize_for_parsing = self.arguments_dic["optimize-for-parsing"] #self.mode = self.arguments_dic["mode"] self.is_promiscuous = self.arguments_dic["promiscuous"] # Flag deciding whether database gives information that is going to be added to more than one user entiries self.database = None if self.arguments_dic.has_key("default-attribute"): self.default_eE_attribute = self.arguments_dic["default-attribute"] # default externalEntityAttribute specified by the particular database parser (it will be overwritten in the parser if not given as argument) else: self.default_eE_attribute = "" #self.content_type_list = content_types self.attribute_identifier_to_max_value_length = {} for key, value in EXTERNAL_ENTITY_IDENTIFIER_ATTRIBUTES + EXTERNAL_ENTITY_VERSIONABLE_IDENTIFIER_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_DESCRIPTIVE_SEARCHABLE_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_DESCRIPTIVE_ATTRIBUTE_TYPES + EXTERNAL_ENTITY_NUMERIC_ATTRIBUTE_TYPES: value = value.lower() if value.startswith("varchar("): self.attribute_identifier_to_max_value_length[key.lower()] = int(value[len("varchar("):].rstrip(")")) elif value.startswith("char("): self.attribute_identifier_to_max_value_length[key.lower()] = int(value[len("char("):].rstrip(")")) elif value.startswith("text("): self.attribute_identifier_to_max_value_length[key.lower()] = int(value[len("text("):].rstrip(")")) elif value.startswith("integer("): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[key.lower()] = len(str((2**8)**int(value[len("integer("):].rstrip(")")))) elif value.startswith("integer"): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[key.lower()] = len(str((2**8)**4)) elif value.startswith("smallint"): if value.endswith(" unsigned"): value = value[:-len(" unsigned")] self.attribute_identifier_to_max_value_length[key.lower()] = len(str((2**8)**2)) elif value.startswith("text"): self.attribute_identifier_to_max_value_length[key.lower()] = 999999 def verify_attribute_length(self, attribute_identifier, attribute_value): attribute_value = attribute_value.lower() length = len(attribute_value) if length > self.attribute_identifier_to_max_value_length[attribute_identifier]: sys.stderr.write("\n%s longer than expected: %s\n" % (attribute_identifier, attribute_value)) elif length == 0: sys.stderr.write("\n%s has 0 length: %s\n" % (attribute_identifier, attribute_value)) def start(self): print "Parser started" if isinstance(self.sourcedb_name,int) or isinstance(self.sourcedb_version,int): sys.stderr.write("You must insert correctly the database name and database version\n") sys.exit(1) #if( self.mode=="scratch" ): self.database_description = self.arguments_dic["database-description"] # Log dictionary where all log information will be stored self.log = {} if self.log_file: self.log_file_fd = file(self.log_file, 'w') self.biana_access = BianaDBaccess(dbname=self.biana_dbname, dbhost=self.biana_dbhost, dbuser=self.biana_dbuser, use_buffer=True, dbpassword=self.biana_dbpass, lock_tables=True, check_integrity=True ) # check data consistency # Time related self.initial_time = time.time() # Insert the information associated to the parsed database # Introduce database info into biana database #if( self.mode=="scratch" ): self.database = ExternalDatabase( databaseName = self.sourcedb_name, databaseVersion = self.sourcedb_version, databaseFile = self.input_file.split(os.sep)[-1], databaseDescription = self.database_description, defaultExternalEntityAttribute = self.default_eE_attribute, isPromiscuous = self.is_promiscuous ) #content_type_list = self.content_type_list) self.biana_access.insert_new_external_database( externalDatabase = self.database ) # Open the input file descriptor # This is a responsability of subclasses method try: if self.optimize_for_parsing: self.biana_access.optimize_database_for(mode="parsing") self.parse_database() # set the parsing time self.database.set_parsing_time( int(time.time() - self.initial_time) ) # Updates the information that this external database has inserted self.biana_access.update_external_database_external_entity_attributes( self.database ) self.close() except: traceback.print_exc() sys.stderr.write("ERROR WHILE PARSING. ALL MODIFICATIONS ARE GOING TO BE DELETED\n") self.biana_access._rollback() sys.exit(1) # METHODS def close(self): ## LAST STEP: CLOSE DATABASE CONNECTION IMPORTANT !!!! ## As bianaDBaccess uses an internal buffer, it is necessary to close the connection to sure that all inserts are correctly done, as well as unlock tables self.biana_access.close() if self.time_control: sys.stderr.write("Total time: %s seconds\n" %(time.time()-self.initial_time)) if self.log_file: self.log_file_fd.write(self.get_log_string()) self.log_file_fd.close() if self.verbose: sys.stderr.write("\n Total time: %s \n" %(time.time()-self.initial_time) ) sys.stderr.write(self.get_log_string()) ## GENERAL PARSER METHODS ## def parseArguments(self): """ Method that returns a dictionary with the values of the arguments """ arguments = self.compulsory_arguments+self.optional_arguments # Set all default values #return_values = [i[1] for i in arguments] return_dict = {} for i in arguments: return_dict[i[0].replace("=","")] = i[1] # Obtain a list with the names of all arguments list_arguments = [argument[0] for argument in arguments] # It can be of the following way because it contains "=" digit #list_arguments = return_dict.keys() # Parse arguments try: opts, args = getopt.getopt(sys.argv[2:], "", list_arguments) except getopt.GetoptError, bad_opt: # return error in parsing parameters, and return void list raise ValueError("%s\n" %(bad_opt.__str__()) ) # If there is no error, continue with the parsing for option,value in opts: if option=="--help": self.print_help() sys.exit(2) for actual_argument in list_arguments: # Delete the "=" value if it has temp_arg = actual_argument.replace("=","") if option=="--"+temp_arg: if value=="": return_dict[temp_arg]=1 else: return_dict[temp_arg]=value # Check for all compulsory arguments: for comp_arg in self.compulsory_arguments: if return_dict[comp_arg[0].replace("=","")] is None: sys.stderr.write("%s argument is not defined!\n" %(comp_arg[0].replace("=",""))) self.print_help() sys.exit(2) return return_dict
def start(self): print "Parser started" if isinstance(self.sourcedb_name, int) or isinstance( self.sourcedb_version, int): sys.stderr.write( "You must insert correctly the database name and database version\n" ) sys.exit(1) #if( self.mode=="scratch" ): self.database_description = self.arguments_dic["database-description"] # Log dictionary where all log information will be stored self.log = {} if self.log_file: self.log_file_fd = file(self.log_file, 'w') self.biana_access = BianaDBaccess(dbname=self.biana_dbname, dbhost=self.biana_dbhost, dbuser=self.biana_dbuser, use_buffer=True, dbpassword=self.biana_dbpass, lock_tables=True, check_integrity=True) # check data consistency # Time related self.initial_time = time.time() # Insert the information associated to the parsed database # Introduce database info into biana database #if( self.mode=="scratch" ): self.database = ExternalDatabase( databaseName=self.sourcedb_name, databaseVersion=self.sourcedb_version, databaseFile=self.input_file.split(os.sep)[-1], databaseDescription=self.database_description, defaultExternalEntityAttribute=self.default_eE_attribute, isPromiscuous=self.is_promiscuous) #content_type_list = self.content_type_list) self.biana_access.insert_new_external_database( externalDatabase=self.database) # Open the input file descriptor # This is a responsability of subclasses method try: if self.optimize_for_parsing: self.biana_access.optimize_database_for(mode="parsing") self.parse_database() # set the parsing time self.database.set_parsing_time(int(time.time() - self.initial_time)) # Updates the information that this external database has inserted self.biana_access.update_external_database_external_entity_attributes( self.database) self.close() except: traceback.print_exc() sys.stderr.write( "ERROR WHILE PARSING. ALL MODIFICATIONS ARE GOING TO BE DELETED\n" ) self.biana_access._rollback() sys.exit(1)
def start(self): print "Parser started" if isinstance(self.sourcedb_name,int) or isinstance(self.sourcedb_version,int): sys.stderr.write("You must insert correctly the database name and database version\n") sys.exit(1) #if( self.mode=="scratch" ): self.database_description = self.arguments_dic["database-description"] # Log dictionary where all log information will be stored self.log = {} if self.log_file: self.log_file_fd = file(self.log_file, 'w') self.biana_access = BianaDBaccess(dbname=self.biana_dbname, dbhost=self.biana_dbhost, dbuser=self.biana_dbuser, use_buffer=True, dbpassword=self.biana_dbpass, lock_tables=True, check_integrity=True ) # check data consistency # Time related self.initial_time = time.time() # Insert the information associated to the parsed database # Introduce database info into biana database #if( self.mode=="scratch" ): self.database = ExternalDatabase( databaseName = self.sourcedb_name, databaseVersion = self.sourcedb_version, databaseFile = self.input_file.split(os.sep)[-1], databaseDescription = self.database_description, defaultExternalEntityAttribute = self.default_eE_attribute, isPromiscuous = self.is_promiscuous ) #content_type_list = self.content_type_list) self.biana_access.insert_new_external_database( externalDatabase = self.database ) # Open the input file descriptor # This is a responsability of subclasses method try: if self.optimize_for_parsing: self.biana_access.optimize_database_for(mode="parsing") self.parse_database() # set the parsing time self.database.set_parsing_time( int(time.time() - self.initial_time) ) # Updates the information that this external database has inserted self.biana_access.update_external_database_external_entity_attributes( self.database ) self.close() except: traceback.print_exc() sys.stderr.write("ERROR WHILE PARSING. ALL MODIFICATIONS ARE GOING TO BE DELETED\n") self.biana_access._rollback() sys.exit(1)