Esempi in Python per add, esempi in Python per webscavator.utils.utils.session.add

Esempio n. 1

0

Mostra file

File: visualController.py Progetto: Webscavator/Webscavator

    def jsonAddFilter(self):
        """
            Endpoint for the AJAX request to validate and add a new filter. If valid, the filter
            is added and a new hash is created and appended to the database file with the current 
            date and time, returning `True`. Otherwise `self.form_error` is returned.
        """
        if self.validate_form(add_filter_form()):
            text = self.form_result['name']
            label = "".join(self.form_result['name'].split(' ')).lower()

            fq = FilterQuery()
            fil = Filter(label, text)
            for f in self.form_result['filter']:
                cls = f['cls']
                attr = f['attribute']
                func = f['function']
                val = f['value']
                
                if f['value_list'] is not None:
                    val_list = f['value_list']
                else:
                    val_list = None
                
                fq.add_element(cls, attr, func, val, val_list)
                
            fil.query = fq
            session.add(fil)
            session.commit()
            
            self.write_log(self.dbfile, 'Added a filter called ' + text)
            
            return True
        else:
            return self.form_error

Esempio n. 2

0

Mostra file

File: caseController.py Progetto: Webscavator/Webscavator

 def addData(self, entry):
     """ 
         Given a validated form called `entry`, adds the group to the database, 
         then calls `self.addEntry()` to convert the data in `entry['data']` to `Entry` objects. 
         Returns the group if the adding of entries was successful, and otherwise
         returns `None`. 
     """        
     group = Group(entry[u'name'], entry[u'desc'], self.case, get_program(entry[u'program']))
     group.csv_name = entry[u'data'].filename
     session.add(group)
     
     # convert the file to entries 
     done, error_msg = self.addEntry(group.program, entry['data'].stream, group)
     if done == True:
         return group, None # used in jsonEditEntries
     else:
         return None, error_msg # exception happened!

Esempio n. 3

0

Mostra file

File: caseController.py Progetto: Webscavator/Webscavator

 def jsonAddCase(self):
     """ 
         Endpoint for wizard 1 form (adding case name and dbfile name). This is done via Ajax. 
         If the form has errors, the error dictionary `self.form_errors` is jsonified 
         and passed back to the webpage and the errors displayed to the user. 
         If the form is valid, then the new case is created, the database initialised 
         and data saved to the database. This will then return `True`. 
     """
     if self.validate_form(wizard1_form()):
         # form is validated, so add the case information
         Case.create_database(self.form_result[u'dbfile']) # switch to the new database 
         
         case = Case(self.form_result[u'name'])
         session.add(case)
          
         self.dbfile = self.form_result[u'dbfile'] #set sqlite database location cookie
         
         return True
     else:
         return self.form_error

Esempio n. 4

0

Mostra file

File: caseController.py Progetto: Webscavator/Webscavator

 def addEntry(self, program, file, group):
     """
         Calls the generator `convert_file()` found in :doc:`converters` on each row of the file, 
         and adds the result to the database. If an exception happens during the converting and
         adding of data, then the session is rolled back and `None` is returned. Otherwise
         `True` is returned. 
         
         .. note::
             This had been optimised to make the adding of data as fast as possible, but
             has been slowed down again by adding search terms. 
             
             **ToDo**: Optimise the adding of search terms. 
     """
     session.flush()
     browser_ids = {}        
     
     try:
         entry_ins = Entry.__table__.insert()
         url_ins = URL.__table__.insert()
         count_num = 0
         for d in convert_file(program, file):
             
             browser_name = d.pop('browser_name')
             browser_version = d.pop('browser_version')
             source = d.pop('source_file')
                             
             key = (browser_name, browser_version, source)                
             browser_id = browser_ids.get(key)
             if browser_id is None:
                 browser = Browser.getFilterBy(name=browser_name, version=browser_version,
                                               source=source).first()
                 if browser is None:
                     browser = Browser(*key)
                     session.add(browser)
                     session.flush()
                 browser_id = browser_ids[key] = browser.id
             
             # optimised to make adding data as fast as possible - ignores the ORM
             v = d.pop('access_time')
             if v is not None:
                 d['access_date'] = datetime(v.year, v.month, v.day, 0, 0, 0, 0)
                 d['access_time'] = time(v.hour, v.minute, v.second, v.microsecond)
             else:
                 continue # don't add data without an access time
             v = d.pop('modified_time')
             if v is not None:
                 d['modified_date'] = datetime(v.year, v.month, v.day, 0, 0, 0, 0)
                 d['modified_time'] = time(v.hour, v.minute, v.second, v.microsecond)
             else:
                 d['modified_date'] = None
                 d['modified_time'] = None
             
             result = session.execute(entry_ins.values(browser_id=browser_id, 
                                                       group_id=group.id,
                                                        **d))                
             entry_id = result.last_inserted_ids()[0]
             
             # add URLS
             url = URL(d['url'])
             session.execute(url_ins.values(entry_id=entry_id, **url.asDict()))  
             url_id = result.last_inserted_ids()[0]
             
             # add search terms
             # TODO: make this optimised like above!
             entry = Entry.get(entry_id)
             url = URL.get(url_id)
             
             opts = config.options('search_engines')
             if url.query != None and 'search' in url.path:
                 for opt in opts:
                     if opt in url.netloc:
                         query = url.query.split(config.get('search_engines', opt)+'=')\
                                 [-1].split('&')[0]
                         q_string, terms = SearchTerms.getTerms(urllib.unquote(query))
                         url.search = q_string 
                         for term in terms:
                             t = SearchTerms.getFilterBy(term=term, engine=opt).first()
                             if t is None:
                                 t = SearchTerms(term, opt, config.get('search', opt))
                                 session.add(t)
                             else:
                                 t.occurrence = t.occurrence + 1
                             entry.search_terms.append(t)
                             session.flush()   
             count_num = count_num + 1
         if count_num == 0:
         # we have not added anything, but no exceptions where raised.
             return None, "No entries found in the uploaded file"             
     except Exception, e:
         session.rollback()            
         return None, e

Esempio n. 5

0

Mostra file

File: baseController.py Progetto: lowmanio/Webscavator

    def addDefaultFilters(self):
        """
            Adds the default filters for the timegraph such as filtering by browser type,
            group, work hours, Google searches and local files. Gets called when a new case
            is being set up in `finish_wizard()` in :doc:`caseController`. 
        """
        
        # Add filters for the browsers available, unless only one browser, then a filter on 
        # everything is pointless
        browsers = Browser.getAll().group_by(Browser.name).all()
        if len(browsers) > 1:
            for browser in browsers:
                f = Filter(u''.join(browser.name.lower().split(' ')), browser.name)
                fq = FilterQuery()
                cls = u'Browser'
                attr = u'name'
                func = u'Is'
                val = browser.name
                fq.add_element(cls, attr, func, val, None)
                    
                f.query = fq
                session.add(f)
                session.flush()
        
        # filters for Google searches
        f = Filter(u'googlesearch', u'Google searches')
        fq = FilterQuery()
        
        params = [(u'URL Parts', u'query', u'Is not', None, None),
                  (u'URL Parts', u'netloc', u'Is not', None, None),
                  (u'URL Parts', u'path', u'Is not', None, None),
                  (u'URL Parts', u'netloc', u'Contains', u'google', None),
                  (u'URL Parts', u'path', u'Contains', u'search', None),
                  ]
        for entry in params:
            fq.add_element(*entry)
            
        f.query = fq
        session.add(f)
        session.flush()

        # filters for local files accessed
        files = URL.getFilterBy(scheme="file").all()
        if files is not None:
            f = Filter(u'files', u'Local Files')
            fq = FilterQuery()
            cls = u'URL Parts'
            attr = u'scheme'
            func = u'Is'
            val = u'file'
            fq.add_element(cls, attr, func, val, None)  
            f.query = fq
            session.add(f)
            session.flush() 
        
        # filters for different groups      
        groups = Group.getAll().all()
        if len(groups) > 1:
            for group in groups:
                f = Filter(u''.join(group.name.lower().split(' ')), group.name)
                fq = FilterQuery()
                cls = u'Group'
                attr = u'name'
                func = u'Is'
                val = group.name
                fq.add_element(cls, attr, func, val, None)  
                f.query = fq
                session.add(f)
                session.flush()
                
        # filters for work hours 
        f = Filter(u'workhours', u'Work hours')
        fq = FilterQuery()
        
        five = time(17, 00, 01)
        nine = time(8, 59, 59)
        params = [(u'Entry', u'access_time', u'Less than', five, None),
                  (u'Entry', u'access_time', u'Greater than', nine, None),
                  ]
        for entry in params:
            fq.add_element(*entry)
              
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for adverts 
        f = Filter(u'adverts', u'Advert URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'domain',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'domain',u'Is in list', None, 'advert_domainnames.txt') 
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for Facebook, MySpace, Bebo, twitter, hi5
        f = Filter(u'social', u'Social Networking URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'domain',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'domain',u'Is in list', None, 'socialmedia.txt') 
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for email
        f = Filter(u'email', u'Web Email')
        fq = FilterQuery()
        five = time(17, 00, 01)
        nine = time(8, 59, 59)
        params = [(u'Entry', u'url', u'Contains', 'mail', None),
                  (u'URL Parts', u'scheme', u'Is Not', 'file', None),
                  ]
        for entry in params:
            fq.add_element(*entry)
              
        f.query = fq
        session.add(f)
        session.flush()
        
        # filters for news
        f = Filter(u'news', u'News URLs')
        fq = FilterQuery()
        fq.add_element(u'URL Parts',u'hostname',u'Is not', None, None) 
        fq.add_element(u'URL Parts',u'hostname',u'Is in list', None, 'news.txt') 
        f.query = fq
        session.add(f)
        session.flush()