Beispiel #1
0
Datei: db.py Projekt: F3DS/f3ds
def setupDB(url):
    logger = Logger("db")
    logger.log("Setting up database")
    engine = create_engine(url, encoding='utf-8')

    #if url.startswith("sqlite"):
    #    event.listen(engine, 'connect', onconnect)

    session = Session()
    session.configure(bind=engine)
    model.Base.metadata.bind = engine
    model.Base.metadata.create_all()

    return session, engine
Beispiel #2
0
class BaseRPCCommands(xmlrpc.XMLRPC):
    """
    RPC functions that are offered to other peers.
    """
    allowNone = True
    useDateTime = True
    def __init__(self, config, session):
        self.session = session
        self.config = config
        self.logger = Logger("BaseRPCFunctions")

    def _getpeer(self, peername, action):
        """
        Attempt to retrieve a peer
        """
        try:
            return Peer.getByName(self.session, peername)
        except MultipleResultsFound, e:
            self.logger.log("A Peer %r %s but we have multiple peers by that name: %r"
                                   % (peername, action, e))
        except NoResultFound, e:
            self.logger.log("A Peer %r %s but we have no peers by that name: %r"
                                  % (peername, action, e))
Beispiel #3
0
class ScannableRequest(object):
    """
    State object representing the state of a request. Includes most code required to do scanning of a url.

    @type url: C{str}
    @ivar url: url that was requested

    @type filesize: C{int}
    @ivar filesize: size of the file at the requested url
    
    @type hash: C{str} (empty string for not available)
    @ivar hash: the hash of the file, if the file has been retrieved and hashed. use L{getHash} to
                ensure this happens. L{dolocalscan} performs a download and hash, if one has not
                already been done.

    @type fileid: C{str}
    @ivar fileid: the UUID of the file downloaded.
                  C{config.scanning.download_location.format(id=fileid)} will give the path to the
                  downloaded file, and C{config.scanning.local_server_url.format(id=fileid)} will
                  give an http url to the same file.

    @type timeout: C{float}
    @ivar timeout: timeout to schedule when sleep() is called
    """
    def __init__(self, config, session, url=None, parentrequest=None,
                 digestmanager=None, scanlogmanager=None):
        self.logger = Logger("Scanner")
        self.config = config
        self.session = session
        self.handler = scanhandlers.get(config.scanning.handler)

        if not url and not parentrequest:
            raise Exception("url or parentrequest must be provided!")
        elif not url:
            self.url = parentrequest.url
        else:
            self.url = url

        self.fileid = None
        self.parentrequest = parentrequest
        self.digestmanager = digestmanager
        self.scanlogmanager = scanlogmanager
        try:
            self.timeout = float(self.config.scanning.timeout)
        except:
            self.timeout = 0.1
        self.headers = {}
        self.downloaded_filepath = ''
        self.downloaded_filesize = -1 # an invalid size and can safely be used for "unknown"
        self.objectage = None
        self.contenthash = ''
        self.scan = None
        self.closepeers = []

    def sleep(self):
        """
        @return: a twisted Deferred that will be called L{timeout} seconds after
        this method is called
        @rtype: C{twisted.internet.defer.Deferred}
        """
        d = defer.Deferred()
        reactor.callLater(self.timeout, d.callback, None)
        return d

    #@cached
    def retrieve(self):
        """
        Retrieve the url via http and store it.
        urllib warning: When opening HTTPS URLs, does not attempt to validate the server certificate.
        """
        self.logger.log('ScannableRequest.retrieve called')
        with TimeMeasurer() as retrieve_timer:
            id = str(uuid.uuid4())
            filepath = self.config.scanning.download_location.format(id=id)
            if not os.path.exists(os.path.dirname(filepath)):
                os.makedirs(os.path.dirname(filepath))
            self.fileid = id
            # grab file with a subprocess...
            script_name = 'urlretrieve.py'
            script_path = os.path.join(frameworkdir, script_name)
            proc = subprocess.Popen(["python", script_path, self.url, filepath],
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            proc.wait()
            if(proc.returncode != 0):
                error = proc.stderr.readlines()
                self.logger.log("Error downloading url %s for scanning: %s " % (self.url, error))
                raise IncompleteScanError
            else:
                lines = proc.stdout.readlines()
                lines[:] = [l.strip() for l in lines]
                filepath, headers = pickle.loads('\n'.join(lines))
        self.downloaded_filepath = filepath
        self.headers = dict(headers)
        self.retrievems = int(retrieve_timer.total * 1000.0)  # time.time() uses seconds, not ms

    @property
    def filepath(self):
        """
        Ensure we have downloaded the file when trying to use filepath.
        """
        if not self.downloaded_filepath:
            self.retrieve()
        return self.downloaded_filepath

    def retrieveHeaders(self):
        """
        Retrieve the headers from the url via HTTP HEAD, if they are not already stored.
        """
        if not self.headers:
            oururl = urlparse.urlparse(self.url)
            if oururl.scheme == "http":
                conn = httplib.HTTPConnection(oururl.netloc)
            else:
                conn = httplib.HTTPSConnection(oururl.netloc)
            conn.request("HEAD", oururl.path)
            response = conn.getresponse()
            self.headers = dict(response.getheaders())
        if self.downloaded_filesize < 0:
            try:
                content_length = self.headers["content-length"]
                self.downloaded_filesize = int(content_length)
            except KeyError:
                self.logger.log("url returned no content-length: %r" % (self.url))
            except ValueError:
                self.logger.log("url returned invalid filesize: %r" % content_length)
        if not self.objectage:
            try:
                age = self.headers['last-modified']
                self.objectage = datetime.strptime(age, '%a, %d %b %Y %H:%M:%S %Z')
            except KeyError:
                self.logger.log('url returned no last-modified: %r' % (self.url))
            except ValueError:
                self.logger.log('url returned invalid last-modified: %r' % age)
        # message-length should be handled too:
        # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.4

    @property
    def filesize(self):
        """
        Ensure we have the filesize when we try to use it.
        """
        if self.downloaded_filesize < 0:
            self.retrieveHeaders()
        # If it is still invalid, see what the os can give us
        if self.downloaded_filesize < 0:
            try:
                self.downloaded_filesize = os.path.getsize(self.filepath)
            except (IOError, WindowsError), e:
                msg = "Error while processing %s (file stats for file at %s): %s"
                self.logger.log(msg % (self.url, self.filepath, e))
                raise IncompleteScanError
        return self.downloaded_filesize
Beispiel #4
0
class SocialScanRPCCommands(BaseRPCCommands):
    """
    SocialScan RPC functions that are offered to other peers.
    """
    def __init__(self, config, session):
        super(SocialScanRPCCommands, self).__init__(config, session)
        self.logger = Logger("RPCFunctions")

    def xmlrpc_scanlogOffer(self, peername, url):
        """
        Called by a peer to offer a scanlog. Stores the offer so that a worker
        can retrieve it later (due to the large size scanlogs often reach).
        """
        self.xmlrpc_logOffer(peername, url, logtype='scan')

    def xmlrpc_scanRequest(self, peername, url, key):
        try:
            peer = self._getpeer(peername, "requested a scan on url %s" % url)
            if not peer:
                return "peer not known"

            request = QueuedRequest(self.config.owner, "active-scan", peer,
                                    url, key=key)
            self.session.add(request)
            self.session.commit()

            self.logger.log("Scan request %r: %r" % (key, request))
            return "success"
        except:
            self.logger.exception()
            return "exception"

    def xmlrpc_scanResult(self, peername, url, hash, key, malicious,
                          scannervv, sigversion, sigdatestr):
        try:
            peer = self._getpeer(peername, "returned a scan on url %s" % url)
            if not peer:
                return "peer not known"

            request = self.session.query(SentScanRequest).\
                        filter(SentScanRequest.owner == self.config.owner).\
                        filter(SentScanRequest.key == key).\
                        filter(SentScanRequest.peer == peer).\
                        filter(SentScanRequest.url == url).first()
            if not request:
                self.log("Peer %r attempted to return a scan result for "
                            "url %r with key %r, but no such scan was requested"
                            % (peer, url, key))
                return "no such request"

            sigdate = datetime.datetime.utcfromtimestamp(int(sigdatestr))

            hash = hash or None  # if the hash is empty or similar, replace with None

            scan = Scan(self.config.owner, "social-active", url, malicious,
                        siginfo=SigInfo(scannervv, sigversion, sigdate),
                        hash=hash, sentrequest=request, peer=peer)
            self.session.add(scan)
            self.session.commit()
            self.logger.log("Scan result %r: %r" % (key, scan))
            return "success"
        except:
            self.logger.exception()
            return "exception"
Beispiel #5
0
class Redirector(basic.LineOnlyReceiver):
    """
    Redirector protocol class. Implements the squid redirector protocol, agnostic of where
    it is used from. Currently only uses the url field from the squid redirector protocol,
    so leaving out the other fields will have no effect.

    @ivar logger: 'Redirector' logger
    @type logger: L{Logger}

    @ivar config: config instance
    @type config: L{AttributeConfig}

    @ivar endpoint: endpoint location of Core server on localhost
    @type endpoint: C{twisted.internet.endpoints.TCP4ClientEndpoint}
    """
    begin = None
    end = None
    delimiter = "\n"
    def __init__(self, config):
        self.logger = Logger('Redirector')
        self.config = config
        self.endpoint = TCP4ClientEndpoint(reactor, "127.0.0.1", int(config.scanning._core_port))

    def parseLine(self, line):
        """
        Parse a line from squid
        @param line: line received from squid
        @type line: C{str}
        @return: url from the line
        @rtype: C{str}
        """
        # IDnum URLstr ip/fqdn ident method key=value key=value
        # or
        # URLstr ip/fqdn ident method key=value key=value
        # E.g., http://www.google.com 192.168.100.1/- user2 GET myip=192.168.100.1 myport=3128

        split = line.split(" ")
        fields = iter(split)

        # the following block deals with the ID number being optional
        first = fields.next()
        try:
            channelid = int(first)
        except ValueError:
            url = first
        else:
            url = fields.next()

        return url

    def stop(self):
        """
        Stop running; provided in case this protocol is subclassed.
        Stops the reactor.
        """
        reactor.stop()

    @property
    def totaltime(self):
        if self.begin and self.end:
            return self.end - self.begin
        elif self.begin:
            return datetime.now() - self.begin
        else:
            return timedelta(0)

    def callback(self, result):
        """
        Callback provided to CoreClientFactory.
        @param result: url to redirect to, or C{""}.
        @type result: C{str}
        """
        self.end = datetime.now()
        msg = 'Decision took %s seconds; URL result: %s'
        self.logger.log(msg % (self.totaltime.total_seconds(), result))
        self.transport.write("%s\n" % result)

    def dataReceived(self, data):
        """
        A hack to make sure this protocol will work regardless of whether it is fed
        \\r\\n newlines or \\n newlines.
        """
        basic.LineOnlyReceiver.dataReceived(self, data.replace("\r", ""))

    def lineReceived(self, line):
        """
        Handle a received line.
        @type line: C{str}
        @param line: line received
        """
        self.begin = datetime.now()
        self.logger.log("Got a new request: [%s]" % line.replace("\n", ""))
        if not line:
            self.logger.log("Line empty, exiting: %r" % line)
            self.stop()

        url = self.parseLine(line)
        if not url:
            self.logger.log("URL empty, ignoring: %r" % url)
            return

        factory = CoreClientFactory(url, self.callback)
        self.endpoint.connect(factory)
Beispiel #6
0
class ContainerManager(object):
    """
    Container manager object which stores and manages loaded containers, and runs jobs related to them.
    For example, containers may be of type ScanLogFile or ScanDigestFile

    @ivar logger: "ContainerManager" logger
    @type logger: L{Logger}

    @ivar config: socialscan configuration
    @type config: L{AttributeConfig}

    @ivar session: SQLAlchemy database session
    @type session: C{sqlalchemy.orm.session.Session}

    @ivar loadlimit: C{int} version of C{config.container_manager.loadlimit}
    @type loadlimit: C{int}

    @ivar containers: foreign containers currently loaded
    @type containers: C{list} of L{ContainerMixin}

    @ivar ourcontainer: the container that is currently being built by this container manager
    @type ourcontainer: L{ContainerMixin}

    @ivar announcequeue: local containers to announce to peers
    @type announcequeue: C{list} of L{ContainerMixin}
    """

    def __init__(self, config, session, container_mixin):
        self.container = container_mixin
        self.cname = self.container.__name__.lower()
        self.name = "%sManager" % (self.container.__name__)
        self.logger = Logger("%s" % self.name)
        self.logger.log("initializing %s" % (self.name))
        self.config = config
        self.session = session
        self.loaded = False

        sharedir = os.path.dirname(config.container_manager.share_location)
        storedir = os.path.dirname(config.container_manager.storage_location)
        if not os.path.exists(sharedir):
            self.logger.log("creating share dir %r" % sharedir)
            os.makedirs(sharedir)
        if not os.path.exists(storedir):
            self.logger.log("creating storage dir %r" % storedir)
            os.makedirs(storedir)

        self.loadlimit = int(config.container_manager.loadlimit)
        self.announcequeue = []
        self.containers = []
        allcontainers = (
            session.query(self.container)
            .filter(self.container.owner == config.owner)
            .filter(self.container.creator != config.owner)
            .all()
        )
        # TODO: get the below filter working
        # .filter(self.container.tainted == False)\
        sortedcontainers = sorted(allcontainers, key=lambda container: container.usefulness)
        for container in sortedcontainers[: self.loadlimit]:
            try:
                container.container_type = eval(container.container_type_name)
                self.containers.append(container.load())
            except (ValueError, AttributeError, IOError), error:
                msg = "Error while loading %s %s: %s"
                self.logger.log(msg % (self.cname, container, error))

        self.ourcontainer = (
            session.query(self.container)
            .filter(self.container.owner == config.owner)
            .filter(self.container.creator == config.owner)
            .filter(self.container.complete == False)
            .order_by(self.container.date.desc())
            .first()
        )
        if self.ourcontainer:
            try:
                self.ourcontainer.container_type = eval(self.ourcontainer.container_type_name)
                self.ourcontainer.load()
            except:
                self.logger.exception()
                self.ourcontainer = None
            else:
                self.loaded = True
        if not self.ourcontainer:
            self._newcontainer(None)