Beispiel #1
0
 def __init__(self, species, download=False, withEns=True):
     self.species = species
     self.dbName = None
     self.data = Collector(self.species)
     self.data.collectAll(download=download, withEns=withEns)
     self.TranscriptNoProteinRec = {}
     self.DomainsSourceDB = 'DB_merged.sqlite'
     self.DomainOrg = DomainOrganizer()
Beispiel #2
0
class Pusher:
    
    def __init__ (self, config):
        self.c1 = Collector(config.DB)
        self.c2 = Collector(config.DB2)
        self.s1 = SheetItf(config.GOOGLE['credentials'], config.GOOGLE['scope'], config.GOOGLE['key'], config.GOOGLE['sheet'])

    def push(self):
        self.s1.addToRow([str(time.strftime("%d/%m/%Y")), str(time.strftime("%H:%M:%S"))])
        self.s1.addToRow(self.c1.collect())
        self.s1.addToRow(self.c2.collect())
        print("pushing:", self.s1.nextRow)
        self.s1.pushRow() 
Beispiel #3
0
def collect_stock():
    collector = Collector()
    try:
        collector.collect_daily_data()
    except:
        try:
            collector.collect_daily_data()
        except:
            collector.collect_daily_data()
Beispiel #4
0
 def runTest(self):
     collector = Collector(self.tmpCacheDir, 
                           serverHost='127.0.0.1', 
                           serverPort='8000',
                           serverProtocol='http',
                           serverUser=testAuthCreds[0],
                           serverPass=testAuthCreds[1],  
                           clientId='test-fuzzer1')
     
     config = ProgramConfiguration("mozilla-central", "x86-64", "linux", version="ba0bc4f26681")
     crashInfo = CrashInfo.fromRawCrashData([], asanTraceCrash.splitlines(), config)
     
     # TODO: This is only a rudimentary check to see if we submitted *something*.
     # We should check more precisely that the information submitted is correct.
     issueCount = self.getRemoteCrashEntryCount()
     collector.submit(crashInfo, exampleTestCase)
     self.assertEqual(self.getRemoteCrashEntryCount(), issueCount + 1)
Beispiel #5
0
def collect_data(mode):
    video = videoCapture()
    collector = Collector(mode)
    directory = collector.getDirectory()
    frame = video.getMainFrame()
    ROI_NAME = 'Region of Interest'
    print(color("Data collecting started...", Colors.yellow))
    print(color("video windows is opening ", Colors.yellow))
    collector.generateFolders()
    while video.isRunning():
        video.update()
        # Add Texts and Frames
        video.addRightText(mode)
        for gesture in gestures:
            video.addRightText(gestures[gesture] + ': ' +
                               str(filesCounter(directory + str(gesture))[0]))
        video.addFrame(ROI_NAME,
                       ROIcoordinates(frame)[0],
                       ROIcoordinates(frame)[1],
                       ROIcoordinates(frame)[2],
                       ROIcoordinates(frame)[3])
        video.display()
        interrupt = cv2.waitKey(10)
        ROI = video.getFrameRegion(ROI_NAME)
        collector.keyPressToImage(ROI, interrupt)
        if interrupt & 0xFF == 27:
            video.stop()
    video.releaseCamera()
    cv2.destroyAllWindows()
    print(color("Data collecting finished", Colors.yellow))
 def work(self, login, password, proxy=None, first=False):
     if (not first):
         # If we aren't first, sleep a bit and let the first one create database
         time.sleep(1)
     print("Spawning thread {}, proxy {}".format(login, proxy))
     # Move here to cope with psycopg restriction on being unable to use same session in different processes
     from Collector import Collector
     collector = Collector(login, password, proxy)
     worker = VkWorker(collector)
Beispiel #7
0
    def runTest(self):
        collector = Collector(self.tmpCacheDir,
                              serverHost='127.0.0.1',
                              serverPort='8000',
                              serverProtocol='http',
                              serverUser=testAuthCreds[0],
                              serverPass=testAuthCreds[1],
                              clientId='test-fuzzer1')

        collector.refresh()

        receivedSignatures = False

        for sigFile in os.listdir(self.tmpCacheDir):
            receivedSignatures = True
            CrashSignature.fromFile(os.path.join(self.tmpCacheDir, sigFile))

        if not receivedSignatures:
            self.skipTest("Server did not provide signatures")
Beispiel #8
0
 def runTest(self):
     collector = Collector(self.tmpCacheDir, 
                           serverHost='127.0.0.1', 
                           serverPort='8000',
                           serverProtocol='http',
                           serverUser=testAuthCreds[0],
                           serverPass=testAuthCreds[1],  
                           clientId='test-fuzzer1')
     
     collector.refresh()
     
     receivedSignatures = False
     
     for sigFile in os.listdir(self.tmpCacheDir):
         receivedSignatures = True
         CrashSignature.fromFile(os.path.join(self.tmpCacheDir, sigFile))
     
     if not receivedSignatures:
         self.skipTest("Server did not provide signatures")
Beispiel #9
0
 def __init__(self, startTime = 0, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.sharedObjects = {}
     self.startTime = startTime
     self.xcontext = XValueContext(lambda: self.now() + self.startTime)
     self.t = self.xcontext.t
     self.collector = Collector()
     self.tcounter = 0
     self.initialize()
     self.xvalues = {}
     self.logging = True
Beispiel #10
0
 def __init__(self, binsAmount):
     self.gridWidth = 20
     self.gridHeight = 9
     self.fieldSize = 64
     self.window = Tk()
     self.canvas = Canvas(self.window,
                          width=self.fieldSize * self.gridWidth,
                          height=self.fieldSize * self.gridHeight)
     self.binsAmount = binsAmount
     self.window.title("Simulation")
     self.collector = Collector(1, 1, 1)
     self.positionsToVisit = []
     self.mapElements = []
     self.addDumps()
     self.addRoads()
     self.addBins()
     self.addGrass()
     self.MovementLogic = MovementLogic(self.mapElements, self.gridWidth,
                                        self.gridHeight)
     self.classifier = GarbageClassifier("learningExamples.txt")
Beispiel #11
0
    def run(self):
        self._collector_ready = False
        self._res_list = []
        collector_ref = Collector.start(self._num_executors, self)
        for idx in range(self._num_executors):
            self._executors[idx].tell({'type': 'run', 'collector': collector_ref})

        while not self._collector_ready:
            time.sleep(0.1)
        collector_ref.stop()
        return self._res_list
Beispiel #12
0
    def runTest(self):
        collector = Collector(self.tmpCacheDir,
                              serverHost='127.0.0.1',
                              serverPort='8000',
                              serverProtocol='http',
                              serverUser=testAuthCreds[0],
                              serverPass=testAuthCreds[1],
                              clientId='test-fuzzer1')

        config = ProgramConfiguration("mozilla-central",
                                      "x86-64",
                                      "linux",
                                      version="ba0bc4f26681")
        crashInfo = CrashInfo.fromRawCrashData([], asanTraceCrash.splitlines(),
                                               config)

        # TODO: This is only a rudimentary check to see if we submitted *something*.
        # We should check more precisely that the information submitted is correct.
        issueCount = self.getRemoteCrashEntryCount()
        collector.submit(crashInfo, exampleTestCase)
        self.assertEqual(self.getRemoteCrashEntryCount(), issueCount + 1)
def collect_data(outfilename, measurement_time=default_time) :
  
  collector = Collector(port,isTCP=isTCP);
  time.sleep(1);
  
  outfile_encoder = open(outfilename+'_encoder.dat','w');
  outfile_encoder.write('#TIME ERROR DIRECTION TIMERCOUNT REFERENCE\n');
  outfile_encoder.flush()
  outfile_irig = open(outfilename+'_irig.dat','w');
  outfile_irig.write('#TIMERCOUNT YEAR DAY HOUR MINUTE SECOND\n');
  outfile_irig.flush()
  outfile_timeout = open(outfilename+'_timeout.dat','w');
  outfile_timeout.write('#TIME TYPE\n');
  outfile_timeout.flush()
  outfile_error = open(outfilename+'_error.dat','w');
  outfile_error.write('#TIME ERRORCODE\n');
  outfile_error.flush()

  encoder_header  = 0x1EAF;
  irig_header     = 0xCAFE;
  timeout_header  = 0x1234;
  error_header    = 0xE12A;

  encoder_extractor = EncoderExtractor(encoder_header);
  encoder_bytesize  = encoder_extractor.pi.total_bytesize;
  irig_extractor = IrigExtractor(irig_header);
  irig_bytesize  = irig_extractor.pi.total_bytesize;
  timeout_extractor = TimeoutExtractor(timeout_header);
  timeout_bytesize  = timeout_extractor.pi.total_bytesize;
  error_extractor = ErrorExtractor(error_header);
  error_bytesize  = error_extractor.pi.total_bytesize;

  header_unpack_str = encoder_extractor.pi.header_str;
  header_size       = encoder_extractor.pi.header_num;
  header_bytesize   = encoder_extractor.pi.header_bytesize;

  start_time = time.time()

  while True :
    encoder_frames = [];
    irig_frames    = [];
    timeout_frames = [];
    error_frames   = [];
    # Empty the queue and parse its contents appropriately
    approx_size = collector.queue.qsize()
    if approx_size>0 and verbose>0 : print('approximate size = {}'.format(approx_size));
  
    for i in range(approx_size):
      # Block=True : Block execution until there is something in the queue to retrieve
      # timeout=None : the get() command will try indefinitely
      data = collector.queue.get(block=True, timeout=None);
    
      # Once data is extracted from the queue, parse its contents
      # and loop until data is empty
      data_len = len(data);
      if verbose>0 : 
          print('obtained data size = {}'.format(data_len));
          pass;
      parse_index = 0;
      while parse_index < data_len:
          if verbose>0 : print('parse_index = {} / data_len = {}'.format(parse_index, data_len));
          # Extract header
          header = data[parse_index : parse_index + header_bytesize];
          if verbose>0 : 
              if   header!=0 : print('obtained header (size) = {} ({})'.format(header,len(header)));
              elif verbose>1 : print('obtained header (size) = {} ({})'.format(header,len(header)));
              pass;
          # unpack from binary ( byte order: little endian(<), format : L (unsigned long) )
          header = struct.unpack(("%s%s" % (endian, header_unpack_str)), header)[0]
          # Check for Encoder packet
          if header == encoder_header:
              if verbose>0 : print('  header == encoder');
              encoder_frames.append(encoder_extractor.extract(data, parse_index));
              parse_index += encoder_bytesize
          elif header == irig_header:
              if verbose>0 : print('  header == irig');
              irig_frames.append(irig_extractor.extract(data, parse_index));
              parse_index += irig_bytesize
          elif header == timeout_header:
              if verbose>0 : print('  header == timeout');
              timeout_frames.append(timeout_extractor.extract(data, parse_index));
              parse_index += timeout_bytesize
          elif header == error_header:
              if verbose>0 : print('  header == error');
              error_frames.append(error_extractor.extract(data, parse_index));
              parse_index += error_bytesize
          elif header == 0:
              if verbose>1 : print('  header == 0');
              parse_index += header_bytesize
              #break;
          else:
              try :
                raise RuntimeError(("Bad header! This is not encoder/irig/timeout/error header! : %s" % (str(header))))
              except RuntimeError as e:
                  print(e);
                  if verbose>0 :
                    print('###get data###');
                    print(data);
                    print('##############');
                    pass;
                  #sys.exit(-1);
                  break;
                  pass;
              pass;
          pass; # end of ``while parse_index < data_len:``
      # Reset data string
      data = ''
      pass; # end of loop over ``i``

    currenttime = (int)(time.time());
    # write encoder data
    for frame in encoder_frames :
      ncount = len(frame['timercount']);
      for i in range(ncount) :
        outfile_encoder.write('{} {} {} {} {}\n'.format(currenttime, 1-frame['error'][i],frame['quad'][i],frame['timercount'][i],frame['position'][i]));
        pass;
      pass; 
    # write irig data
    for frame in irig_frames :
      outfile_irig.write('{} {} {} {} {} {}\n'.format(frame['timercount'],frame['year'],frame['day'],frame['hour'],frame['minute'],frame['second']));
      pass; 
    # write timeout data
    for frame in timeout_frames :
      outfile_timeout.write('{} {}\n'.format(currenttime, frame['type']));
      pass; 
    # write error data
    for frame in error_frames :
      outfile_error.write('{} {}\n'.format(currenttime, frame['error']));
      pass; 

    # flush output
    outfile_encoder.flush();
    outfile_irig.flush();
    outfile_timeout.flush();
    outfile_error.flush();

    stop_time = time.time()
    if(stop_time - start_time >= measurement_time):
      print(f'{measurement_time} sec have passed and stopped UDP')
      break

    pass; # end of ``while True :``
  
  collector.stop();
  outfile_encoder.close();
  outfile_irig.close();
  outfile_timeout.close();
  outfile_error.close();

  return 0;
Beispiel #14
0
    def doExperiment(self, index):

        collector = Collector()
        path = os.getcwd()
        trainPath = os.path.join(path, 'train')
        testPath = os.path.join(path, 'test')

        if index == 2:
            print('you choose stopWord experiment')
            print('please wait...')
            name = 'stopWord.txt'
            fileName = os.path.join(path, name)
            f = open(fileName, 'r', encoding='gb18030')

            line = f.read()
            f.close()
            collector.stopWord += line.split('\n')

            resultPath = os.path.join(path, 'stopWord')
            modelName = 'demo-model-exp2.txt'
            resultName = 'demo-result-exp2.txt'
            self.doExpClassify(trainPath, testPath, resultPath, collector,
                               modelName, resultName)
        elif index == 3:
            print('you choose word length experiment')
            print('please wait...')
            collector.removeWordLength[0] = 2
            collector.removeWordLength[1] = 9
            resultPath = os.path.join(path, 'wordLength')
            modelName = 'demo-model-exp3.txt'
            resultName = 'demo-result-exp3.txt'
            self.doExpClassify(trainPath, testPath, resultPath, collector,
                               modelName, resultName)
        elif index == 4:
            print('you choose infrequent experiment')
            choose = input(
                'please choose value(0) or percentage(1),default is value')
            if choose == '1':
                i = input(
                    'please enter the percentage(%)(5%-25%),default is 5%: ')
                i = int(i)
                if i < 5 or i > 25:
                    i = 0.05
                else:
                    i = float(i / 100)
            else:
                i = input('please enter the value(1-20), defalut is 1: ')
                i = int(i)
                if i < 1 or i > 20:
                    i = 1
            print('please wait...')
            resultPath = os.path.join(path, 'infrequent')
            curCollector = Collector()
            modelName = 'demo-model-exp4.txt'
            resultName = 'demo-result-exp4.txt'

            curCollector.dataCollector(trainPath)
            curCollector.infrequentProcess(i)

            curCollector.buildModel(resultPath, modelName)

            curCollector.doClassify(testPath, resultPath, resultName)

        elif index == 5:
            print('you choose delta change experiment')
            i = input(
                'please choose the value of delta(0.1-1), default is 0.5: ')
            print('please wait...')
            if float(i) < 0.1 or float(i) > 1:
                i = 0.5
            newDelta = float(i)

            resultPath = os.path.join(path, 'delta')
            curCollector = Collector()
            curCollector.delta = newDelta
            modelName = 'demo-model-exp5.txt'
            resultName = 'demo-result-exp5.txt'
            self.doExpClassify(trainPath, testPath, resultPath, curCollector,
                               modelName, resultName)

        else:
            print('you choose baseLine experiment')
            print('please wait...')
            resultPath = os.path.join(path, 'baseLine')
            modelName = 'demo-model-base.txt'
            resultName = 'demo-result-base.txt'
            self.doExpClassify(trainPath, testPath, resultPath, collector,
                               modelName, resultName)
Beispiel #15
0
    # Each score represents level of confidence for each of the objects.
    detection_scores = inference.get_model_detection_scores()
    detection_classes = inference.get_model_detection_classes()

    # Number of objects detected.
    num_detections = inference.get_model_detected_objects()

    camera = PiCamera()
    camera.resolution = (IM_WIDTH, IM_HEIGHT)
    camera.framerate = 10
    rawCapture = PiRGBArray(camera, size=(IM_WIDTH, IM_HEIGHT))
    rawCapture.truncate(0)

    # Initialize frame rate calculation
    collector = Collector()
    frame_rate_calc = collector.frame_rate_calc
    freq = collector.freq
    font = collector.font

    # Initialize publisher service.
    publisher = Publisher()

    for frame1 in camera.capture_continuous(rawCapture,
                                            format="bgr",
                                            use_video_port=True):

        t1 = cv2.getTickCount()

        # Acquire frame and expand frame dimensions to have shape:
        # [1, None, None, 3]
Beispiel #16
0
from datetime import datetime

df = pd.read_csv("dataset.csv")
temp = open("precips.txt")
skip = len(temp.readlines())
print(skip)

for idx, row in enumerate(df.iterrows()):
    if idx < skip:
        continue
    # if idx == 0:
    # break
    row = row[1]
    lat = str(row.BEGIN_LAT)
    lon = str(row.BEGIN_LON)
    date = row.BEGIN_DATE
    parts = date.split("/")
    date = parts[0]+"/" + parts[2] + "/" + parts[1]
    c = Collector(lat, lon, date)
    file = open("precips.txt", "a")
    data = c.getData()
    if data[0] == 'no' or data[1] == 'no':
        print("API RAN OUT: . Didn't get", idx)
        break
    if idx % 50 == 0:
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print(idx, current_time, " || ", date, lat, lon, data)
    file.write(str(row.EVENT_ID) + ";;" + data[0] + ";;" + data[1] + "|||\n")
    # file.close()
Beispiel #17
0
 def __init__ (self, config):
     self.c1 = Collector(config.DB)
     self.c2 = Collector(config.DB2)
     self.s1 = SheetItf(config.GOOGLE['credentials'], config.GOOGLE['scope'], config.GOOGLE['key'], config.GOOGLE['sheet'])
# Get utils module
import utils

# Setup environment variables
import Process
process = Process.getInstance(open('trading-data/.env', 'r'))

program = sys.argv[1]

if process.env['MASTER_SWITCH'] == 'on':
    if program == 'collect' and len(sys.argv) > 2:
        target_date = utils.parseISODate(sys.argv[2])
        # kick off data collection sequence    
        from Collector import Collector
        collector = Collector(target_date, False)
        collector.start()
    elif program == 'collect' and len(sys.argv) <= 2:
        from Collector import Collector
        collector = Collector(datetime.now(), True)
        collector.start()
    elif program == 'train':
        from Learner import Learner
        learner = Learner(False)
        learner.start()
    elif program == 'predict':
        from Learner import Learner
        learner = Learner(True, False)
        learner.start()
    else:
        print 'Invalid parameters given.  Shutting down...'
import configparser
import os

from Database.DBManager import DBManager
from Collector import Collector

SOURCE_PATH = os.path.dirname(os.path.abspath(__file__))

CONFIG_FILE_NAME = SOURCE_PATH + "/settings.env"

print(CONFIG_FILE_NAME)

# Starting point

config = configparser.ConfigParser()
config.read(CONFIG_FILE_NAME)

config_raw = configparser.RawConfigParser()
config_raw.read(CONFIG_FILE_NAME)

TIME_FORMAT = config_raw.get("Common", "TIME_FORMAT")

DB_HOST = config.get("Database", "DB_HOST")
DB_DATABASE = config.get("Database", "DB_DATABASE")
DB_USERNAME = config.get("Database", "DB_USERNAME")
DB_PASSWORD = config.get("Database", "DB_PASSWORD")

db_manager = DBManager(DB_HOST, DB_USERNAME, DB_PASSWORD, DB_DATABASE)

Collector.collect(db_manager, TIME_FORMAT)
Beispiel #20
0
class dbBuilder:

    def __init__(self, species, download=False, withEns=True):
        self.species = species
        self.dbName = None
        self.data = Collector(self.species)
        self.data.collectAll(download=download, withEns=withEns)
        self.TranscriptNoProteinRec = {}
        self.DomainsSourceDB = 'DB_merged.sqlite'
        self.DomainOrg = DomainOrganizer(download=download)

    def create_tables_db(self, merged=True, dbName=None):
        """
        Create a transcripts table in the specie database and fills with ucsc transcripts data
        """
        if dbName is not None:
            self.dbName = dbName
        elif merged:
            self.dbName = 'DB_merged'
        else:
            self.dbName = 'DB_' + self.species

        print("Creating database: {}...".format(self.dbName))
        with connect(self.dbName + '.sqlite') as con:
            cur = con.cursor()
            cur.executescript('DROP TABLE IF EXISTS Genes;')
            print('Creating the table: Genes')
            cur.execute('''
                        CREATE TABLE Genes(
                                gene_GeneID_id TEXT UNIQUE,
                                gene_ensembl_id TEXT UNIQUE,
                                gene_symbol TEXT,
                                synonyms TEXT,
                                chromosome TEXT,
                                strand TEXT,
                                specie TEXT, 
                                PRIMARY KEY(gene_GeneID_id, gene_ensembl_id, gene_symbol)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS transcripts;")
            print('Creating the table: Transcripts')
            cur.execute('''
                        CREATE TABLE Transcripts(
                                transcript_refseq_id TEXT UNIQUE,
                                transcript_ensembl_id TEXT UNIQUE,
                                tx_start INTEGER,
                                tx_end INTEGER,
                                cds_start INTEGER,
                                cds_end INTEGER,
                                exon_count INTEGER,
                                gene_GeneID_id TEXT,                        
                                gene_ensembl_id TEXT,
                                protein_refseq_id TEXT,
                                protein_ensembl_id TEXT,
                                PRIMARY KEY (transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(gene_GeneID_id, gene_ensembl_id) REFERENCES Genes(gene_GeneID_id, gene_ensembl_id),
                                FOREIGN KEY(protein_refseq_id,protein_ensembl_id) REFERENCES Proteins(protein_refseq_id,protein_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS Exons;")
            print('Creating the table: Exons')
            cur.execute('''
                        CREATE TABLE Exons(
                                gene_GeneID_id TEXT,
                                gene_ensembl_id TEXT,        
                                genomic_start_tx INTEGER,
                                genomic_end_tx INTEGER,
                                PRIMARY KEY (gene_GeneID_id, gene_ensembl_id, genomic_start_tx, genomic_end_tx),
                                FOREIGN KEY(gene_GeneID_id, gene_ensembl_id) REFERENCES Genes(gene_GeneID_id, gene_ensembl_id)
                                );'''
                        )

            cur.executescript("DROP TABLE IF EXISTS Transcript_Exon;")
            print('Creating the table: Transcript_Exon')
            cur.execute('''
                        CREATE TABLE Transcript_Exon(
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                order_in_transcript INTEGER,
                                genomic_start_tx INTEGER,
                                genomic_end_tx INTEGER,
                                abs_start_CDS INTEGER,
                                abs_end_CDS INTEGER,
                                PRIMARY KEY(transcript_refseq_id, transcript_ensembl_id, order_in_transcript),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) 
                                 REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(genomic_start_tx, genomic_end_tx)\
                                 REFERENCES Exons(genomic_start_tx, genomic_end_tx)
                                );'''
                        )

            cur.executescript("DROP TABLE IF EXISTS Proteins;")
            print('Creating the table: Proteins')
            cur.execute('''
                        CREATE TABLE Proteins(
                                protein_refseq_id TEXT UNIQUE,
                                protein_ensembl_id TEXT UNIQUE,
                                description TEXT,
                                synonyms TEXT,
                                length INTEGER,
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                PRIMARY KEY(protein_refseq_id, protein_ensembl_id),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS DomainType;")
            print('Creating the table: DomainType')
            cur.execute('''
                        CREATE TABLE DomainType(
                                type_id INTEGER NOT NULL PRIMARY KEY UNIQUE,
                                name TEXT,
                                other_name TEXT,
                                description TEXT,
                                CDD_id TEXT,
                                cdd TEXT,
                                pfam TEXT,
                                smart TEXT,
                                tigr TEXT,
                                interpro TEXT
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS DomainEvent;")
            print('Creating the table: DomainEvent')
            cur.execute('''
                        CREATE TABLE DomainEvent(
                                protein_refseq_id TEXT,
                                protein_ensembl_id TEXT,
                                type_id INTEGER,
                                AA_start INTEGER,
                                AA_end INTEGER,
                                nuc_start INTEGER,
                                nuc_end INTEGER,
                                total_length INTEGER,
                                ext_id TEXT,
                                splice_junction BOOLEAN,
                                complete_exon BOOLEAN,
                                PRIMARY KEY(protein_refseq_id, protein_ensembl_id, type_id, AA_start, total_length),
                                FOREIGN KEY(type_id) REFERENCES DomainType(type_id),
                                FOREIGN KEY(protein_refseq_id, protein_ensembl_id) 
                                 REFERENCES Proteins(protein_refseq_id, protein_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS SpliceInDomains;")
            print('Creating the table: SpliceInDomains')
            cur.execute("""
                        CREATE TABLE SpliceInDomains(
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                exon_order_in_transcript INTEGER,
                                type_id INTEGER,
                                total_length INTEGER,
                                domain_nuc_start INTEGER,
                                included_len INTEGER,
                                exon_num_in_domain INTEGER,
                                PRIMARY KEY (transcript_refseq_id, transcript_ensembl_id, exon_order_in_transcript, type_id,\
                                total_length, domain_nuc_start),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) 
                                 REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(exon_order_in_transcript) REFERENCES Transcript_Exon(order_in_transcript),
                                FOREIGN KEY(type_id) REFERENCES DomainType(type_id),
                                FOREIGN KEY(domain_nuc_start, total_length) REFERENCES DomainEvent(Nuc_start, total_length)
                                );"""
                        )
            if merged:
                cur.executescript("DROP TABLE IF EXISTS Orthology;")
                print('Creating the table: Orthology')
                cur.execute("""
                            CREATE TABLE Orthology(
                                    A_ensembl_id TEXT,
                                    A_GeneSymb TEXT,
                                    A_species TEXT,
                                    B_ensembl_id TEXT,
                                    B_GeneSymb TEXT,
                                    B_species TEXT,
                                    PRIMARY KEY (A_ensembl_id, B_ensembl_id),
                                    FOREIGN KEY (A_ensembl_id, B_ensembl_id, A_GeneSymb, B_GeneSymb, A_species, B_species) 
                                    REFERENCES Genes(gene_ensembl_id, gene_ensembl_id, gene_symbol, gene_symbol, specie, specie)
                                    );"""
                            )
        # ~~~ disconnect database ~~~

    def create_index(self):
        """ Creates index for for efficient searches"""
        with connect(self.dbName + '.sqlite') as con:
            cur = con.cursor()
            cur.execute('''CREATE INDEX geneTableIndexBySpecies ON Genes(specie);''')
            cur.execute('''CREATE INDEX transcriptTableIndexByGene ON Transcripts(gene_GeneID_id) ;''')
            cur.execute(
                '''CREATE INDEX exonsInTranscriptsTableIndexByTranscripts ON Transcript_Exon(transcript_refseq_id) ;''')
            cur.execute('''CREATE INDEX domainEventsTableIndexByProtein ON DomainEvent(protein_refseq_id) ;''')
            cur.execute('''CREATE INDEX domainEventsTableIndexByEnsembl ON DomainEvent(protein_ensembl_id);''')
            cur.execute(
                '''CREATE INDEX exonInTranscriptsTableIndexByEnsembl ON Transcript_Exon(transcript_ensembl_id);''')

    def fill_in_db(self, CollectDomainsFromMerged=True, merged=True, dbName=None):
        """
        This is filling the database with the collected data for a single species.
        if used db is "merged" than set True to the param. if False than a species unique db will be created.
        """
        if dbName is not None:
            self.dbName = dbName
        elif merged:
            self.dbName = 'DB_merged'
        else:
            self.dbName = 'DB_' + self.species
        if CollectDomainsFromMerged:  # to keep domain ids consistent between the merged & single species db
            self.DomainOrg.collectDatafromDB(self.DomainsSourceDB)
            preDomains = set(self.DomainOrg.allDomains.keys())

        with connect(self.dbName + '.sqlite') as con:
            print("Connected to " + self.dbName + "...")
            print("Filling in the tables...")
            cur = con.cursor()
            geneSet = set()
            uExon = set()
            relevantDomains = set()

            for tID, transcript in self.data.Transcripts.items():
                ensemblkey = False
                if tID.startswith("ENS"):
                    ensemblkey = True
                e_counts = len(transcript.exon_starts)
                # insert into Transcripts table
                if transcript.CDS is None:
                    print("Transcript {} from {} has None in CDS".format(tID, self.species))
                    transcript.CDS = transcript.tx
                values = (transcript.refseq, transcript.ensembl,) + transcript.tx + transcript.CDS + \
                         (e_counts, transcript.gene_GeneID, transcript.gene_ensembl,
                          transcript.protein_refseq, transcript.protein_ensembl,)
                cur.execute('''INSERT INTO Transcripts
                            (transcript_refseq_id, transcript_ensembl_id, tx_start, tx_end, cds_start,\
                             cds_end, exon_count, gene_GeneID_id, gene_ensembl_id, protein_refseq_id, protein_ensembl_id) 
                            VALUES(?,?,?,?,?,?,?,?,?,?,?)''', values)

                # insert into Genes table
                if transcript.gene_GeneID not in geneSet and \
                        transcript.gene_ensembl not in geneSet:
                    gene = self.data.Genes.get(
                        transcript.gene_GeneID if transcript.gene_GeneID is not None else transcript.gene_ensembl,
                        self.data.Genes.get(transcript.gene_ensembl, None))
                    if gene is None:
                        raise ValueError("No gene in Genes for transcript {}, {}. GeneID: {}, ensembl gene: {}".format(
                            transcript.refseq, transcript.ensembl, transcript.gene_GeneID, transcript.gene_ensembl))
                    # if ensemblkey:
                    #     gene = self.data.Genes.get(transcript.gene_ensembl, self.data.Genes[transcript.gene_GeneID])
                    #     # syno = gene.synonyms
                    # else:
                    #     gene = self.data.Genes[transcript.gene_GeneID]
                    #     # syno = [self.data.Genes[transcript.gene_GeneID].synonyms
                    #     #       if transcript.gene_GeneID is not None else None][0]
                    values = (gene.GeneID, gene.ensembl, gene.symbol,
                              gene.synonyms, gene.chromosome, gene.strand, self.species,)
                    cur.execute(''' INSERT INTO Genes
                                (gene_GeneID_id, gene_ensembl_id, gene_symbol, synonyms, chromosome,\
                                 strand, specie)
                                VALUES (?, ?, ?, ?, ?, ?, ?)''', values)
                    geneSet.add(gene.GeneID)
                    geneSet.add(gene.ensembl)
                    geneSet = geneSet - {None}

                start_abs, stop_abs = transcript.exons2abs()
                ex_num = 0
                starts = transcript.exon_starts.copy()
                ends = transcript.exon_ends.copy()
                for iEx in range(e_counts):
                    ex_num += 1
                    # insert into Transcript_Exon table
                    values = (transcript.refseq, transcript.ensembl, ex_num, starts[iEx], ends[iEx],
                              start_abs[iEx], stop_abs[iEx],)
                    cur.execute(''' INSERT INTO Transcript_Exon
                                (transcript_refseq_id, transcript_ensembl_id, order_in_transcript,\
                                genomic_start_tx, genomic_end_tx, abs_start_CDS, abs_end_CDS)
                                VALUES (?, ?, ?, ?, ?, ?, ?)''', values)

                    # insert into Exons table
                    values = (transcript.gene_GeneID, transcript.gene_ensembl, starts[iEx], ends[iEx],)
                    if values not in uExon:
                        uExon.add(values)
                        cur.execute('''INSERT INTO Exons
                                    (gene_GeneID_id, gene_ensembl_id, genomic_start_tx, genomic_end_tx)
                                    VALUES (?, ?, ?, ?)''', values)

                # insert into Proteins table
                if ensemblkey:
                    protID = transcript.protein_ensembl
                else:
                    protID = transcript.protein_refseq
                protein = self.data.Proteins[protID]
                values = (protein.refseq, protein.ensembl, protein.description, protein.length,
                          protein.synonyms, transcript.refseq, transcript.ensembl,)
                cur.execute(''' INSERT INTO Proteins
                                (protein_refseq_id, protein_ensembl_id, description, length, synonyms, transcript_refseq_id, transcript_ensembl_id)
                                VALUES (?, ?, ?, ?, ?, ?, ?)''', values)
                splicin = set()
                # domeve = set()
                Domdf = pd.DataFrame(columns=["protein_refseq_id", "protein_ensembl_id", "type_id",
                                              "AA_start", "AA_end", "nuc_start", "nuc_end", "total_length",
                                              "ext_id", "splice_junction", "complete_exon"])
                for reg in self.data.Domains.get(protID, [None]):
                    if reg is None:
                        continue
                    regID = self.DomainOrg.addDomain(reg)
                    if regID is None:
                        continue
                    relevantDomains.add(regID)
                    relation, exon_list, length = reg.domain_exon_relationship(start_abs, stop_abs)
                    total_length = reg.nucEnd - reg.nucStart + 1  # adding one because coordinates are full-closed!
                    splice_junction = 0
                    complete = 0
                    if relation == 'splice_junction':
                        splice_junction = 1
                        for i in range(len(exon_list)):
                            values = (transcript.refseq, transcript.ensembl,
                                      exon_list[i], reg.nucStart, regID,
                                      total_length, length[i], i + 1,)
                            if values not in splicin:
                                cur.execute(''' INSERT INTO SpliceInDomains
                                    (transcript_refseq_id, transcript_ensembl_id,\
                                     exon_order_in_transcript, domain_nuc_start, type_id,\
                                     total_length, included_len, exon_num_in_domain)
                                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)''', values)
                                splicin.add(values)
                    elif relation == 'complete_exon':
                        complete = 1
                    # insert into domain event table
                    ldf = Domdf.shape[0]
                    extWithInter = "; ".join([reg.extID, self.DomainOrg.allDomains[regID][-1]]) if \
                        self.DomainOrg.allDomains[regID][-1] is not None else reg.extID
                    values = (protein.refseq, protein.ensembl, regID,
                              reg.aaStart, reg.aaEnd, reg.nucStart, reg.nucEnd, total_length,
                              extWithInter, splice_junction, complete,)
                    Domdf.loc[ldf] = list(values)
                Domdf = Domdf.drop_duplicates()
                Domdf = Domdf.fillna(-1)
                Domdf = Domdf.groupby(["protein_refseq_id", "protein_ensembl_id", "type_id",
                                       "AA_start", "AA_end", "nuc_start", "nuc_end", "total_length",
                                       "splice_junction", "complete_exon"],
                                      as_index=False, sort=False).agg(
                    lambda col: "; ".join(set(col)))  # groupby all besides ext_ID
                Domdf = Domdf.replace(-1, np.nan)
                Domdf.to_sql("DomainEvent", con, if_exists="append", index=False)
            # ~~~ end of loop iterating over transcripts ~~~
            bp = time.time()

            if merged:
                relevantDomains = preDomains.union(relevantDomains)
                print('Recreating the table: DomainType and update domains')
                cur.executescript("DROP TABLE IF EXISTS DomainType;")
                print('Creating the table: DomainType')
                cur.execute('''
                            CREATE TABLE DomainType(
                                    type_id INTEGER NOT NULL PRIMARY KEY UNIQUE,
                                    name TEXT,
                                    other_name TEXT,
                                    description TEXT,
                                    CDD_id TEXT,
                                    cdd TEXT,
                                    pfam TEXT,
                                    smart TEXT,
                                    tigr TEXT,
                                    interpro TEXT
                                    );'''
                            )
            # insert into domain type table
            postDomains = set(self.DomainOrg.allDomains.keys())
            print("from all {} domains in organizer, {} not in relevant domains".format(len(postDomains),
                                                                                        len(postDomains.difference(relevantDomains))))
            for typeID in relevantDomains:
                if typeID in self.DomainOrg.allDomains.keys():
                    values = (typeID,) + self.DomainOrg.allDomains[typeID]
                    cur.execute(''' INSERT INTO DomainType
                                    (type_id, name, other_name, description, CDD_id, cdd,\
                                    pfam, smart, tigr, interpro)
                                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', values)
            print("#### Filling in domain type table: %s seconds" % (time.time() - bp))

            con.commit()
        # ~~~ disconnect database ~~~

    def AddOrthology(self, orthologsDict):
        """
        This function adds the orthology data to the database, only for the genes included in the database.
        Changes the database with no returned output.
        @param orthologsDict: created by OrthologsBuilder module, called by the main script.
        @return: None
        """
        MainOrtho = pd.DataFrame(columns=['A_ensembl_id', 'A_GeneSymb', 'A_species',
                                          'B_ensembl_id', 'B_GeneSymb', 'B_species'])
        db_data = dict()
        orthology_species = set([spec for x in orthologsDict.keys() for spec in x])
        with connect(self.dbName + '.sqlite') as con:
            cur = con.cursor()
            schema = cur.execute("PRAGMA table_info('Orthology')").fetchall()
            for spec in orthology_species:
                db_data[spec] = pd.read_sql(
                    "SELECT gene_ensembl_id,gene_symbol,specie FROM Genes WHERE specie='{}'".format(spec),
                    con)
            print("collecting orthology data for:")
            for couple, ortho in orthologsDict.items():
                print("\t{} and {}".format(couple[0], couple[1]))
                merged_df = None
                n = 0
                for spec in couple:
                    db_data[spec]['gene_symbol'] = db_data[spec]['gene_symbol'].str.upper()
                    db_data[spec].columns = db_data[spec].columns.str.replace('gene_ensembl_id', spec + "_ID")
                    if n == 0:
                        merged_df = pd.merge(db_data[spec], ortho)
                    else:
                        merged_df = pd.merge(db_data[spec], merged_df)
                    label = 'A' if n == 0 else 'B'
                    merged_df.columns = merged_df.columns.str.replace("specie", label + "_Species")
                    merged_df.columns = merged_df.columns.str.replace("gene_symbol", label + "_GeneSymb")
                    merged_df.columns = merged_df.columns.str.replace(spec + "_ID", label + "_ensembl_id")
                    merged_df = merged_df.drop(spec + "_name", axis=1)
                    n += 1
                MainOrtho = MainOrtho.append(merged_df, sort=False)
            MainOrtho = MainOrtho.drop_duplicates()
            MainOrtho = MainOrtho.groupby(["A_ensembl_id", "B_ensembl_id"], as_index=False, sort=False).agg(
                lambda col: ', '.join(set(col)))
            print("Filling in Orthology table...")
            try:
                MainOrtho.to_sql("Orthology", con, if_exists="replace", schema=schema, index=False)
            except Exception as err:
                print(err)
                MainOrtho.to_csv("OrthologyTable.Failed.csv")
            print("Filling Orthology table complete!")
Beispiel #21
0
    #  Configure logging
    app_logger = logging.getLogger(__name__)
    app_logger.setLevel(logging.INFO)

    es_logger = logging.getLogger('elasticsearch.trace')
    es_logger.setLevel(logging.WARNING)

    handler = logging.FileHandler(filename='burner_%s.log' %
                                  datetime.datetime.now().strftime('%Y%m%d'),
                                  mode='a')
    handler.setLevel(logging.INFO)
    handler.setFormatter(
        logging.Formatter("%(asctime)s %(levelname)s %(message)s",
                          "%Y-%m-%d %H:%M"))
    app_logger.addHandler(handler)
    es_logger.addHandler(handler)

    app_logger.info('Started')

    #  Read configuration file
    config = ConfigParser.RawConfigParser()
    config.read('config.cfg')

    collector = Collector.Collector(config, app_logger)

    updater = Updater(config, app_logger)
    updater.store_users_expenses(collector.get_users_expenses())

    app_logger.info('Completed\n')
Beispiel #22
0
class Simulation(object):
    def checkIfPositionIsEmpty(self, position):
        for i in self.mapElements:
            if i.position == position:
                return False
        return True

    def __init__(self, binsAmount):
        self.gridWidth = 20
        self.gridHeight = 9
        self.fieldSize = 64
        self.window = Tk()
        self.canvas = Canvas(self.window,
                             width=self.fieldSize * self.gridWidth,
                             height=self.fieldSize * self.gridHeight)
        self.binsAmount = binsAmount
        self.window.title("Simulation")
        self.collector = Collector(1, 1, 1)
        self.positionsToVisit = []
        self.mapElements = []
        self.addDumps()
        self.addRoads()
        self.addBins()
        self.addGrass()
        self.MovementLogic = MovementLogic(self.mapElements, self.gridWidth,
                                           self.gridHeight)
        self.classifier = GarbageClassifier("learningExamples.txt")

    def addDumps(self):
        types = ['plastic', 'paper', 'glass', 'other']
        n = 0
        for j in types:
            new = Dump(n, 0, j)
            n = n + 1
            self.mapElements.append(new)

    def addRoad(self, position1, position2):
        if position1[0] == position2[0]:
            for i in range(position1[1], position2[1] + 1):
                if self.checkIfPositionIsEmpty([position1[0], i]):
                    element = Road(position1[0], i)
                    self.mapElements.append(element)
        elif position1[1] == position2[1]:
            for i in range(position1[0], position2[0] + 1):
                if self.checkIfPositionIsEmpty([i, position1[1]]):
                    element = Road(i, position1[1])
                    self.mapElements.append(element)

    def addRoads(self):
        self.addRoad([0, 1], [self.gridWidth, 1])
        self.addRoad([0, 4], [self.gridWidth, 4])
        self.addRoad([0, 7], [self.gridWidth, 7])
        r = randint(1, 6)
        for i in range(0, r):
            s = randint(1, self.gridWidth - 2)
            self.addRoad([s, 1], [s, self.gridHeight - 2])

    def addBins(self):
        for i in range(0, self.binsAmount):
            rightPosition = False
            while not rightPosition:
                x = randint(0, self.gridWidth - 1)
                y = randint(0, self.gridHeight - 1)
                if self.checkIfPositionIsEmpty([x, y]):
                    rightPosition = True
            element = Bin(x, y)
            self.positionsToVisit.append([x, y])
            self.mapElements.append(element)

    def addGrass(self):
        for i in range(0, self.gridWidth):
            for j in range(0, self.gridHeight):
                if self.checkIfPositionIsEmpty([i, j]):
                    element = Grass(i, j)
                    self.mapElements.append(element)

    def display(self):
        for i in self.mapElements:
            x = i.position[0]
            y = i.position[1]
            self.canvas.create_image(x * self.fieldSize,
                                     y * self.fieldSize,
                                     image=i.image,
                                     anchor=NW)
        x = self.collector.state.position[0]
        y = self.collector.state.position[1]
        self.canvas.create_image(x * self.fieldSize,
                                 y * self.fieldSize,
                                 image=self.collector.image,
                                 anchor=NW)
        self.canvas.pack()

    def update(self):
        self.display()
        self.window.update_idletasks()
        self.window.update()
        time.sleep(0.5)

    def classify(self):
        for i in range(0, 5):
            r = randint(1, 40)
            name = "./photos/test/test" + str(r) + ".jpg"
            im = ImageExample(name)
            image = ImageTk.PhotoImage(Image.open(name))
            result = self.classifier.test(im.getString())
            self.canvas.create_image(350, 100, image=image, anchor=NW)
            self.canvas.pack()
            self.window.update_idletasks()
            self.window.update()
            time.sleep(0.5)
            self.canvas.create_text(420,
                                    150,
                                    fill="black",
                                    font="Times 20",
                                    text=result)
            self.canvas.pack()
            self.window.update_idletasks()
            self.window.update()
            time.sleep(2)

    def predictDigits(self):
        sess = tf.Session()

        saver = tf.train.import_meta_graph('./src/model/my-model.meta')
        saver.restore(sess, tf.train.latest_checkpoint('./model'))
        print("Model został wczytany.")

        graph = tf.get_default_graph()
        output_layer = graph.get_tensor_by_name("output:0")
        X = graph.get_tensor_by_name("X:0")

        r = randint(0, 9)
        img = np.invert(
            Image.open("../test_digits/house_test_" + str(r) + ".png"))

        prediction = sess.run(tf.argmax(output_layer, 1), feed_dict={X: [img]})
        print("Rozpoznanie dla testowanego obrazka:", np.squeeze(prediction))

    def start(self):
        for p in self.positionsToVisit:
            for zz in self.mapElements:
                if zz.position == p:
                    zz.searching = True
                    zz.updateImage()
            self.update()

            actions = self.MovementLogic.getActions(self.collector.state, p)
            if actions is not None:
                for i in actions:
                    print(i)
                    self.update()
                    self.collector.doAction(i)
            self.update()
            self.predictDigits()
            self.classify()
            for zz in self.mapElements:
                if zz.position == p:
                    zz.searching = False
                    zz.updateImage()
Beispiel #23
0
def data():
    session = boto3.Session(region_name='us-west-2')
    collector = Collector(session)
    headers = {'Content-Type': 'application/json'}
    return (collector.collect(), 200, headers)
Beispiel #24
0
#thread management
from queue import Queue
from collections import deque
import time

from Collector import Collector
from Detector import Detector
from Processor import Processor

# parameters
maxqueuelen = 200

if __name__ == "__main__":
    # queue for images from camera
    queue_raw = Queue(maxsize=maxqueuelen)
    # queue for boat candidates & frame
    queue_detectors = Queue(maxsize=maxqueuelen)

    collector = Collector(queue_raw)
    detector = Detector(queue_raw, queue_detectors)
    processor = Processor(queue_detectors)

    collector.start()
    detector.start()
    processor.start()

    while True:
        print("queue_raw len: {} queue_detectors len: {}".\
            format( queue_raw.qsize(), queue_detectors.qsize()))
        time.sleep(60)
Beispiel #25
0
def run(samples,channel, use, train,short, preprocess_chain = []):

    if use == "xgb":
        from XGBModel import XGBObject as modelObject
        parameters = "conf/parameters_xgb.json"

    if use == "keras":
        from KerasModel import KerasObject as modelObject
        parameters = "conf/parameters_keras.json"


    read = Reader(channel = channel,
                  config_file = samples,
                  folds=2)

    target_names = read.config["target_names"]
    variables = read.config["variables"]
    if not os.path.exists("models"):
        os.mkdir("models")

    modelname = "models/{0}.{1}".format(channel,use)
    scaler = None

    if train:
        print "Training new model"
        print "Loading Training set"
        trainSet = read.getSamplesForTraining()

        print "Fit Scaler to training set...",
        scaler = trainScaler(trainSet, variables )

        print " done. Dumping for later."
        with open("models/StandardScaler.{0}.pkl".format(channel), 'wb') as FSO:
            cPickle.dump(scaler, FSO , 2)
        trainSet = applyScaler(scaler, trainSet, variables)

        model = modelObject( parameter_file = parameters,
                             variables=variables,
                             target_names = target_names )
        model.train( trainSet )
        model.save(modelname)

    else:
        
        if os.path.exists("models/StandardScaler.{0}.pkl".format(channel) ):
            print "Loading Scaler"
            with open( "models/StandardScaler.{0}.pkl".format(channel), "rb" ) as FSO:
                scaler = cPickle.load( FSO )

        print "Loading model and predicting."
        model = modelObject( filename = modelname )

    where = ""
    coll = Collector( channel = channel,
                      var_name = "pred_prob",
                      target_names = target_names, 
                      path = use, 
                      recreate = True,
                      rebin = False )

    print "Predicting simulation"
    for sample, sampleName in read.get(what = "nominal"):
        pred =  model.predict( applyScaler(scaler, sample, variables), where )
        coll.addPrediction(pred, sample, sampleName)
        
    print "Adding looser samples to predictions"
    for sample, sampleName in read.get(what = "more"):
        pred =  model.predict( applyScaler(scaler, sample, variables), where )
        coll.addPrediction(pred, sample, sampleName)
        
    print "Predicting data"
    for sample, sampleName in read.get(what = "data"):
        pred =  model.predict( applyScaler(scaler, sample, variables), where )
        coll.addPrediction(pred, sample, sampleName)
    
    if not short:
        print "Predicting TES shapes"
        for sample, sampleName in read.get(what = "tes"):
            pred =  model.predict( applyScaler(scaler, sample, variables), where )
            coll.addPrediction(pred, sample, sampleName)

        print "Predicting JES shapes"
        for sample, sampleName in read.get(what = "jec"):
            pred =  model.predict( applyScaler(scaler, sample, variables), where )
            coll.addPrediction(pred, sample, sampleName)   

    coll.createDC(writeAll = True)

    plot = Plotter( channel= channel,
                    naming = read.processes,
                    path = use )

    plot.makePlots()
Beispiel #26
0
class dbBuilder:

    def __init__(self, species, download=False, withEns=True):
        self.species = species
        self.dbName = None
        self.data = Collector(self.species)
        self.data.collectAll(download=download, withEns=withEns)
        self.TranscriptNoProteinRec = {}
        self.DomainsSourceDB = 'DB_merged.sqlite'
        self.DomainOrg = DomainOrganizer()

    def create_tables_db(self, merged=True, dbName=None):
        """
        Create a transcripts table in the specie database and fills with ucsc transcripts data
        """
        if dbName is not None:
            self.dbName = dbName
        elif merged:
            self.dbName = 'DB_merged'
        else:
            self.dbName = 'DB_' + self.species

        print("Creating database: {}...".format(self.dbName))
        with connect(self.dbName + '.sqlite') as con:
            cur = con.cursor()
            cur.executescript('DROP TABLE IF EXISTS Genes;')
            print('Creating the table: Genes')
            cur.execute('''
                        CREATE TABLE Genes(
                                gene_GeneID_id TEXT,
                                gene_ensembl_id TEXT,
                                gene_symbol TEXT,
                                synonyms TEXT,
                                chromosome TEXT,
                                strand TEXT,
                                specie TEXT, 
                                PRIMARY KEY(gene_GeneID_id, gene_ensembl_id, gene_symbol)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS transcripts;")
            print('Creating the table: Transcripts')
            cur.execute('''
                        CREATE TABLE Transcripts(
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                tx_start INTEGER,
                                tx_end INTEGER,
                                cds_start INTEGER,
                                cds_end INTEGER,
                                exon_count INTEGER,
                                gene_GeneID_id TEXT,                        
                                gene_ensembl_id TEXT,
                                protein_refseq_id TEXT,
                                protein_ensembl_id TEXT,
                                PRIMARY KEY (transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(gene_GeneID_id, gene_ensembl_id) REFERENCES Genes(gene_GeneID_id, gene_ensembl_id),
                                FOREIGN KEY(protein_refseq_id,protein_ensembl_id) REFERENCES Proteins(protein_refseq_id,protein_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS Exons;")
            print('Creating the table: Exons')
            cur.execute('''
                        CREATE TABLE Exons(
                                gene_GeneID_id TEXT,
                                gene_ensembl_id TEXT,        
                                genomic_start_tx INTEGER,
                                genomic_end_tx INTEGER,
                                PRIMARY KEY (gene_GeneID_id, gene_ensembl_id, genomic_start_tx, genomic_end_tx),
                                FOREIGN KEY(gene_GeneID_id, gene_ensembl_id) REFERENCES Genes(gene_GeneID_id, gene_ensembl_id)
                                );'''
                        )

            cur.executescript("DROP TABLE IF EXISTS Transcript_Exon;")
            print('Creating the table: Transcript_Exon')
            cur.execute('''
                        CREATE TABLE Transcript_Exon(
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                order_in_transcript INTEGER,
                                genomic_start_tx INTEGER,
                                genomic_end_tx INTEGER,
                                abs_start_CDS INTEGER,
                                abs_end_CDS INTEGER,
                                PRIMARY KEY(transcript_refseq_id, transcript_ensembl_id, order_in_transcript),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) 
                                 REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(genomic_start_tx, genomic_end_tx)\
                                 REFERENCES Exons(genomic_start_tx, genomic_end_tx)
                                );'''
                        )

            cur.executescript("DROP TABLE IF EXISTS Proteins;")
            print('Creating the table: Proteins')
            cur.execute('''
                        CREATE TABLE Proteins(
                                protein_refseq_id TEXT,
                                protein_ensembl_id TEXT,
                                description TEXT,
                                synonyms TEXT,
                                length INTEGER,
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                PRIMARY KEY(protein_refseq_id, protein_ensembl_id),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS DomainType;")
            print('Creating the table: DomainType')
            cur.execute('''
                        CREATE TABLE DomainType(
                                type_id INTEGER NOT NULL PRIMARY KEY UNIQUE,
                                name TEXT,
                                other_name TEXT,
                                description TEXT,
                                CDD_id TEXT,
                                cdd TEXT,
                                pfam TEXT,
                                smart TEXT,
                                tigr TEXT,
                                interpro TEXT
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS DomainEvent;")
            print('Creating the table: DomainEvent')
            cur.execute('''
                        CREATE TABLE DomainEvent(
                                protein_refseq_id TEXT,
                                protein_ensembl_id TEXT,
                                type_id INTEGER,
                                AA_start INTEGER,
                                AA_end INTEGER,
                                nuc_start INTEGER,
                                nuc_end INTEGER,
                                total_length INTEGER,
                                ext_id TEXT,
                                splice_junction BOOLEAN,
                                complete_exon BOOLEAN,
                                PRIMARY KEY(protein_refseq_id, protein_ensembl_id, type_id, AA_start, total_length, ext_id),
                                FOREIGN KEY(type_id) REFERENCES DomainType(type_id),
                                FOREIGN KEY(protein_refseq_id, protein_ensembl_id) 
                                 REFERENCES Proteins(protein_refseq_id, protein_ensembl_id)
                                );'''
                        )
            cur.executescript("DROP TABLE IF EXISTS SpliceInDomains;")
            print('Creating the table: SpliceInDomains')
            cur.execute("""
                        CREATE TABLE SpliceInDomains(
                                transcript_refseq_id TEXT,
                                transcript_ensembl_id TEXT,
                                exon_order_in_transcript INTEGER,
                                type_id INTEGER,
                                total_length INTEGER,
                                domain_nuc_start INTEGER,
                                included_len INTEGER,
                                exon_num_in_domain INTEGER,
                                PRIMARY KEY (transcript_refseq_id, transcript_ensembl_id, exon_order_in_transcript, type_id,\
                                total_length, domain_nuc_start),
                                FOREIGN KEY(transcript_refseq_id, transcript_ensembl_id) 
                                 REFERENCES Transcripts(transcript_refseq_id, transcript_ensembl_id),
                                FOREIGN KEY(exon_order_in_transcript) REFERENCES Transcript_Exon(order_in_transcript),
                                FOREIGN KEY(type_id) REFERENCES DomainType(type_id),
                                FOREIGN KEY(domain_nuc_start, total_length) REFERENCES DomainEvent(Nuc_start, total_length)
                                );"""
                        )
            if merged:
                cur.executescript("DROP TABLE IF EXISTS Orthology;")
                print('Creating the table: Orthology')
                cur.execute("""
                            CREATE TABLE Orthology(
                                    A_ensembl_id TEXT,
                                    A_GeneSymb TEXT,
                                    A_Species TEXT,
                                    B_ensembl_id TEXT,
                                    B_GeneSymb TEXT,
                                    B_Species TEXT,
                                    PRIMARY KEY (A_ensembl_id, B_ensembl_id),
                                    FOREIGN KEY (A_ensembl_id, B_ensembl_id, A_GeneSymb, B_GeneSymb, A_Species, B_Species) 
                                    REFERENCES Genes(gene_ensembl_id, gene_ensembl_id, gene_symbol, gene_symbol, specie, specie)
                                    );"""
                            )

    def create_index(self):
        with connect(self.dbName + '.sqlite') as con:
            cur = con.cursor()
            cur.execute('''CREATE INDEX geneTableIndexBySpecies ON Genes(specie);''')
            cur.execute('''CREATE INDEX transcriptTableIndexByGene ON Transcripts(gene_GeneID_id) ;''')
            cur.execute(
                '''CREATE INDEX exonsInTranscriptsTableIndexByTranscripts ON Transcript_Exon(transcript_refseq_id) ;''')
            cur.execute('''CREATE INDEX domainEventsTableIndexByProtein ON DomainEvent(protein_refseq_id) ;''')
            cur.execute('''CREATE INDEX domainEventsTableIndexByEnsembl ON DomainEvent(protein_ensembl_id);''')
            cur.execute(
                '''CREATE INDEX exonInTranscriptsTableIndexByEnsembl ON Transcript_Exon(transcript_ensembl_id);''')

    def fill_in_db(self, CollectDomainsFromMerged=True, merged=True, dbName=None):
        """
        This function in for unique species. for more than ine use add Species To Merged
        """
        if dbName is not None:
            self.dbName = dbName
        elif merged:
            self.dbName = 'DB_merged'
        else:
            self.dbName = 'DB_' + self.species
        if CollectDomainsFromMerged:
            self.DomainOrg.collectDatafromDB(self.DomainsSourceDB)

        Transcripts = pd.DataFrame(columns=["transcript_refseq_id", "transcript_ensembl_id",
                                            "tx_start", "tx_end", "cds_start", "cds_end", "exon_count",
                                            "gene_GeneID_id", "gene_ensembl_id",
                                            "protein_refseq_id", "protein_ensembl_id"])
        Genes = pd.DataFrame(columns=["gene_GeneID_id", "gene_ensembl_id", "gene_symbol", "synonyms",
                                      "chromosome", "strand", "specie"])
        Transcript_Exon = pd.DataFrame(columns=["transcript_refseq_id", "transcript_ensembl_id",
                                                               "order_in_transcript", "genomic_start_tx",
                                                               "genomic_end_tx", "abs_start_CDS", "abs_end_CDS"])
        Exons = pd.DataFrame(columns=["gene_GeneID_id", "gene_ensembl_id", "genomic_start_tx", "genomic_end_tx"])
        Proteins = pd.DataFrame(columns=["protein_refseq_id", "protein_ensembl_id", "description", "length",
                                         "synonyms", "transcript_refseq_id", "transcript_ensembl_id"])
        SpliceInDomains = pd.DataFrame(columns=["transcript_refseq_id", "transcript_ensembl_id",
                                                "exon_order_in_transcript", "domain_nuc_start", "type_id",
                                                "total_length", "included_len", "exon_num_in_domain"])
        DomainEvent = pd.DataFrame(columns=["protein_refseq_id", "protein_ensembl_id", "type_id", "AA_start", "AA_end",
                                            "nuc_start", "nuc_end", "total_length", "ext_id",
                                            "splice_junction", "complete_exon"])
        DomainType = pd.DataFrame(columns=["type_id", "name", "other_name", "description", "CDD_id", "cdd",
                                           "pfam", "smart", "tigr", "interpro"])
        print("Filling in the tables...")
        geneSet = set()
        uExon = set()
        domeve = set()
        relevantDomains = set()

        for tID, transcript in self.data.Transcripts.items():
            ensemblkey = False
            if tID.startswith("ENS"):
                ensemblkey = True
            e_counts = len(transcript.exon_starts)
            # insert into Transcripts table
            if transcript.CDS is None:
                transcript.CDS = transcript.tx
            values = (transcript.refseq, transcript.ensembl,) + transcript.tx + transcript.CDS + \
                     (e_counts, transcript.gene_GeneID, transcript.gene_ensembl,
                      transcript.protein_refseq, transcript.protein_ensembl,)
            idx = len(Transcripts)
            Transcripts.loc[idx] = list(values)

            # insert into Genes table
            if transcript.gene_GeneID not in geneSet and transcript.gene_ensembl not in geneSet:
                if ensemblkey:
                    gene = self.data.Genes[transcript.gene_ensembl]
                else:
                    gene = self.data.Genes[transcript.gene_GeneID]
                values = (gene.GeneID, gene.ensembl, gene.symbol,
                          gene.synonyms, gene.chromosome, gene.strand, self.species,)
                idx = len(Genes)
                Genes.loc[idx] = list(values)
                geneSet.add(gene.GeneID)
                geneSet.add(gene.ensembl)
                geneSet = geneSet - {None}

            start_abs, stop_abs = transcript.exons2abs()
            ex_num = 0
            starts = transcript.exon_starts.copy()
            ends = transcript.exon_ends.copy()
            for iEx in range(e_counts):
                ex_num += 1
                # insert into Transcript_Exon table
                values = (transcript.refseq, transcript.ensembl, ex_num, starts[iEx], ends[iEx],
                          start_abs[iEx], stop_abs[iEx],)
                idx = len(Transcript_Exon)
                Transcript_Exon.loc[idx] = list(values)

                # insert into Exons table
                values = (transcript.gene_GeneID, transcript.gene_ensembl, starts[iEx], ends[iEx],)
                if values not in uExon:
                    uExon.add(values)
                    idx = len(Exons)
                    Exons.loc[idx] = list(values)

            # insert into Proteins table
            if ensemblkey:
                protID = transcript.protein_ensembl
            else:
                protID = transcript.protein_refseq
            protein = self.data.Proteins[protID]
            values = (protein.refseq, protein.ensembl, protein.description, protein.length,
                      protein.synonyms, transcript.refseq, transcript.ensembl,)
            idx = len(Proteins)
            Proteins.loc[idx] = list(values)

            splicin = set()
            for reg in self.data.Domains.get(protID, [None]):
                if reg is None:
                    continue
                regID = self.DomainOrg.addDomain(reg)
                if regID is None:
                    continue
                relevantDomains.add(regID)
                relation, exon_list, length = reg.domain_exon_relationship(start_abs, stop_abs)
                total_length = reg.nucEnd - reg.nucStart + 1  # adding one because coordinates are full-closed!
                splice_junction = 0
                complete = 0
                if relation == 'splice_junction':
                    splice_junction = 1
                    for i in range(len(exon_list)):
                        values = (transcript.refseq, transcript.ensembl,
                                  exon_list[i], reg.nucStart, regID,
                                  total_length, length[i], i + 1,)
                        if values not in splicin:
                            idx = len(SpliceInDomains)
                            SpliceInDomains.loc[idx] = list(values)
                            splicin.add(values)
                elif relation == 'complete_exon':
                    complete = 1
                # insert into domain event table
                values = (protein.refseq, protein.ensembl, regID,
                          reg.aaStart, reg.aaEnd, reg.nucStart, reg.nucEnd, total_length,
                          reg.extID, splice_junction, complete,)
                if values not in domeve:
                    idx = len(DomainEvent)
                    DomainEvent.loc[idx] = list(values)
                    domeve.add(values)
        bp = time.time()
        if merged:
            relevantDomains = set(self.DomainOrg.allDomains.keys())
        # insert into domain type table
        for typeID in relevantDomains:
            if typeID in self.DomainOrg.allDomains.keys():
                values = (typeID,) + self.DomainOrg.allDomains[typeID]
                idx = len(DomainType)
                DomainType.loc[idx] = list(values)
        print("#### Filling in domain type table: %s seconds" % (time.time() - bp))

        with connect(self.dbName + '.sqlite') as con:
            Transcripts.to_sql("Transcripts", con, if_exists="append", index=False)
            Genes.to_sql("Genes", con, if_exists="append", index=False)
            Proteins.to_sql("Proteins", con, if_exists="append", index=False)
            Transcript_Exon.to_sql("Transcript_Exon", con, if_exists="append", index=False)
            Exons.to_sql("Exons", con, if_exists="append", index=False)
            SpliceInDomains.to_sql("SpliceInDomains", con, if_exists="append", index=False)
            DomainEvent.to_sql("DomainEvent", con, if_exists="append", index=False)
            if merged:
                cur.executescript("DROP TABLE IF EXISTS Orthology;")
                print('Creating the table: Orthology')
                cur.execute("""
                            CREATE TABLE Orthology(
                                    A_ensembl_id TEXT,
                                    A_GeneSymb TEXT,
                                    A_Species TEXT,
                                    B_ensembl_id TEXT,
                                    B_GeneSymb TEXT,
                                    B_Species TEXT,
                                    PRIMARY KEY (A_ensembl_id, B_ensembl_id),
                                    FOREIGN KEY (A_ensembl_id, B_ensembl_id, A_GeneSymb, B_GeneSymb, A_Species, B_Species) 
                                    REFERENCES Genes(gene_ensembl_id, gene_ensembl_id, gene_symbol, gene_symbol, specie, specie)
                                    );"""
                            )
            DomainType.to_sql("DomainType", con, if_exists="append", index=False)

    def AddOrthology(self, orthologsDict):
        MainOrtho = pd.DataFrame(columns=['A_ensembl_id', 'A_GeneSymb', 'A_Species',
                                          'B_ensembl_id', 'B_GeneSymb', 'B_Species'])
        db_data = dict()
        species = [spec for x in orthologsDict.keys() for spec in x]
        with connect(self.dbName + '.sqlite') as con:
            for spec in species:
                db_data[spec] = pd.read_sql(
                    "SELECT gene_ensembl_id,gene_symbol,specie FROM Genes WHERE specie='{}'".format(spec),
                    con)
        print("collecting orthology data for:")
        for couple, ortho in orthologsDict.items():
            print("\t{} and {}".format(couple[0], couple[1]))
            merged_df = None
            n = 0
            for spec in couple:
                db_data[spec]['gene_symbol'] = db_data[spec]['gene_symbol'].str.upper()
                db_data[spec].columns = db_data[spec].columns.str.replace('gene_ensembl_id', spec + "_ID")
                if n == 0:
                    merged_df = pd.merge(db_data[spec], ortho)
                else:
                    merged_df = pd.merge(db_data[spec], merged_df)
                label = 'A' if n == 0 else 'B'
                merged_df.columns = merged_df.columns.str.replace("specie", label + "_Species")
                merged_df.columns = merged_df.columns.str.replace("gene_symbol", label + "_GeneSymb")
                merged_df.columns = merged_df.columns.str.replace(spec + "_ID", label + "_ensembl_id")
                merged_df = merged_df.drop(spec + "_name", axis=1)
                n += 1
            MainOrtho = MainOrtho.append(merged_df, sort=False)
        print("Filling in Orthology table...")
        with connect(self.dbName + '.sqlite') as con:
            MainOrtho.to_sql("Orthology", con, if_exists="append", index=False)
        print("Filling Orthology table complete!")
Beispiel #27
0
from tkinter import *
import os

from tkinter.filedialog import *
from tkinter import messagebox
from threading import Thread

from sys import path as systemPath

if __name__ == "__main__":

    nope = False
    systemPath.insert(1, "src/Scripts/")
    from Collector import Collector
    collector = Collector(systemPath)
    from Loader import Loader
    __loader = Loader()

    tk = Tk()
    tk.withdraw()
    tk.overrideredirect(True)
    tk.resizable(False, False)
    tk.geometry("%dx%d+%d+%d" % (1, 1, 1, 1))

    __loader.tk = tk
    __loader.collector = collector

    from Monitor import Monitor
    __screenSize = Monitor().get_screensize()
    __loader.screenSize = __screenSize