예제 #1
0
class RedditLinkMediaOEmbed(InnerDoc):
    author_name = Keyword()
    author_url = Keyword()
    cache_age = Long(doc_values=False, index=False)
    description = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    height = Short(doc_values=False, index=False)
    html = Keyword(doc_values=False, index=False)
    html5 = Keyword(doc_values=False, index=False)
    mean_alpha = Float(doc_values=False, index=False)
    provider_name = Keyword()
    provider_url = Keyword()
    thumbnail_height = Short(doc_values=False, index=False)
    thumbnail_url = Keyword(doc_values=False, index=False)
    thumbnail_size = Short(doc_values=False, index=False)
    thumbnail_width = Short(doc_values=False, index=False)
    title = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    type = Keyword()
    version = Keyword()
    url = Keyword()
    width = Short(doc_values=False, index=False)
예제 #2
0
class MentorProject(Document):
    name = Keyword(required=True)
    company = Text()
    bio = Text()
    backgroundRural = Boolean(required=True)
    preferStudentUnderRep = Short(required=True)  # (0-2)
    preferToolExistingKnowledge = Boolean(required=True)
    okExtended = Boolean(required=True)
    okTimezoneDifference = Boolean(required=True)
    timezone = Integer(required=True)  # +- UTC
    id = Keyword(required=True)
    proj_description = Text(required=True)
    proj_tags = Keyword(multi=True)
    numStudentsSelected = Short()
    listStudentsSelected = Nested(StudentVote)
    track = Keyword(required=True)

    class Index:
        name = "mentors_index"
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0,
        }

    def add_vote(self, student_id, choice):
        self.listStudentsSelected.append(
            StudentVote(student_id=student_id, choice=choice))

    def save(self, **kwargs):
        self.numStudentsSelected = 0
        return super().save(**kwargs)
예제 #3
0
class RedditLinkMediaEmbed(InnerDoc):
    content = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    height = Short(doc_values=False, index=False)
    media_domain_url = Keyword(doc_values=False, index=False)
    scrolling = Boolean()
    width = Short(doc_values=False, index=False)
예제 #4
0
class RedditMediaMetadata(InnerDoc):
    dashUrl = Keyword(doc_values=False, index=False)  # noqa: N815
    e = Keyword()
    hlsUrl = Keyword(doc_values=False, index=False)  # noqa: N815
    id = Keyword(doc_values=False)
    isGif = Boolean()  # noqa: N815
    m = Keyword()
    s = Object(RedditMediaMetadataS)
    status = Keyword()
    t = Keyword()
    x = Short(doc_values=False, index=False)
    y = Short(doc_values=False, index=False)
예제 #5
0
class RedditLinkMedia(InnerDoc):
    content = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    event_id = Keyword()
    height = Short(doc_values=False, index=False)
    oembed = Object(RedditLinkMediaOEmbed)
    reddit_video = Object(RedditLinkMediaRedditVideo)
    type = Keyword()
    width = Short(doc_values=False, index=False)
예제 #6
0
class ReviewElastic(Document):
    business_id = Keyword()
    cool = Short()
    date = Date(default_timezone='UTC', format='date_hour_minute_second')
    funny = Short()
    review_id = Keyword()
    stars = Short()
    text = Text()
    useful = Short()
    user_id = Keyword()

    class Index:
        name = INDEX_NAME
예제 #7
0
class Position(DocType):
    # 证券id
    securityId = Keyword()
    # 持有数量
    amount = Long()
    # 可交易数量
    availableAmount = Long()
    # 盈亏
    profit = Float()
    # 市值
    value = Float()
    # 成本价
    cost = Float()
    # 交易类型(0代表T+0,1代表T+1)
    tradingT = Short()

    def __init__(self, meta=None, security_id=None, trading_t=1, **kwargs):
        super().__init__(meta, **kwargs)
        self.securityId = security_id
        self.availableAmount = 0
        self.amount = 0
        self.profit = 0
        self.value = 0
        self.cost = 0
        self.tradingT = trading_t
예제 #8
0
class Position(DocType):
    # 证券id
    securityId = Keyword()

    # 做多数量
    longAmount = Long()
    # 可平多数量
    availableLong = Long()
    # 平均做多价格
    averageLongPrice = Long()

    # 做空数量
    shortAmount = Long()
    # 可平空数量
    availableShort = Long()
    # 平均做空价格
    averageShortPrice = Long()

    # 市值 或者 占用的保证金(方便起见,总是100%)
    value = Float()
    # 交易类型(0代表T+0,1代表T+1)
    tradingT = Short()

    def __init__(self, meta=None, security_id=None, trading_t=1, **kwargs):
        super().__init__(meta, **kwargs)
        self.securityId = security_id
        self.longAmount = 0
        self.availableLong = 0
        self.shortAmount = 0
        self.availableShort = 0

        self.profit = 0
        self.value = 0
        self.tradingT = trading_t
예제 #9
0
class TwitterEntitiesMedia(InnerDoc):
    id = Long(doc_values=False, index=False)
    id_str = Keyword(doc_values=False, index=False)
    indices = Short(doc_values=False, index=False, multi=True)

    media_url = Keyword(doc_values=False, index=False)
    media_url_https = Keyword(doc_values=False, index=False)
    url = Keyword(doc_values=False, index=False)
    display_url = Keyword(doc_values=False, index=False)
    expanded_url = Keyword(doc_values=False, index=False)

    type = Keyword()
    original_info = Object(TwitterEntitiesMediaOriginalInfo)
    sizes = Object(TwitterEntitiesMediaSizes)
    source_status_id = Long(doc_values=False, index=False)
    source_status_id_str = Keyword()
    source_user_id = Long(doc_values=False, index=False)
    source_user_id_str = Keyword()
    video_info = Object(TwitterEntitiesMediaVideoInfo)
    features = Object(TwitterEntitiesMediaFeatures)  # {}?

    media_key = Keyword(doc_values=False, index=False)
    ext_media_availability = Object(TwitterEntitiesMediaExtMediaAvailability)
    ext_alt_text = Keyword(doc_values=False, index=False)
    ext_media_color = Object(TwitterExtensionsMediaColor)
    ext = Object(TwitterExtensions)
    additional_media_info = Object(TwitterEntitiesAdditionalMediaInfo)
예제 #10
0
class BusinessElastic(Document):
    business_id = Keyword()
    name = Text()
    address = Text()
    city = Text(fields={'raw': Keyword()})
    state = Text(fields={'raw': Keyword()})
    postal_code = Text()
    location = GeoPoint()
    stars = Short()
    review_count = Short()
    is_open = Boolean()
    categories = Keyword()
    Ambience = Text()
    BusinessParking = Text()
    GoodForMeal = Text()

    class Index:
        name = INDEX_NAME
예제 #11
0
class RedditLinkMediaRedditVideo(InnerDoc):
    dash_url = Keyword(doc_values=False, index=False)
    duration = Integer()
    fallback_url = Keyword(doc_values=False, index=False)
    height = Short(doc_values=False, index=False)
    hls_url = Keyword(doc_values=False, index=False)
    is_gif = Boolean()
    scrubber_media_url = Keyword(doc_values=False, index=False)
    transcoding_status = Keyword()
    width = Boolean()
class PostType(Document):
    published_at = Text()
    title = Text()
    body = Text()
    excerpt = Text()
    slug = Text()
    minutes_to_read = Short()

    class Index:
        name = ES_INDEX_NAME
예제 #13
0
class RedditAwarding(InnerDoc):
    award_type = Keyword()
    coin_price = Integer()
    coin_reward = Integer()
    count = Integer()
    days_of_drip_extension = Integer()
    days_of_premium = Integer()
    description = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    end_date = RedditDate()
    icon_height = Short(doc_values=False, index=False)
    icon_url = Keyword(doc_values=False, index=False)
    icon_width = Short(doc_values=False, index=False)
    id = Keyword()
    is_enabled = Boolean()
    name = Keyword()
    resized_icons = Nested(RedditAwardingResizedIcon)
    start_date = RedditDate()
    subreddit_coin_reward = Integer()
    subreddit_id = Keyword()
예제 #14
0
class Order(DocType):
    # 订单id
    id = Keyword()
    # 交易员id
    botName = Keyword()
    # 证券id
    securityId = Keyword()
    # 买卖(多空)
    direction = Short()
    # 市价/限价
    type = Keyword()
    # 价格
    price = Float()
    # 数量
    amount = Long()
    # 状态
    status = Keyword()
    # 时间
    timestamp = Date()

    class Meta:
        doc_type = 'doc'
        all = MetaField(enabled=False)
예제 #15
0
class TwitterEntitiesIndicesText(InnerDoc):
    indices = Short(doc_values=False, index=False, multi=True)
    text = Keyword(doc_values=False, index=False)
예제 #16
0
class TwitterExtensionsMediaColorPaletteRgb(InnerDoc):
    red = Short(doc_values=False, index=False)
    green = Short(doc_values=False, index=False)
    blue = Short(doc_values=False, index=False)
예제 #17
0
class TwitterEntitiesUrl(InnerDoc):
    url = Keyword()
    expanded_url = Keyword()
    display_url = Keyword()
    indices = Short(multi=True)
예제 #18
0
class TwitterEntitiesUserMention(InnerDoc):
    id = Long(doc_values=False, index=False)
    id_str = Keyword()
    indices = Short(doc_values=False, index=False, multi=True)
    name = Keyword()
    screen_name = Keyword()
예제 #19
0
class RedditLink(RedditBaseDocument):
    domain = Keyword()
    url = Keyword()

    title = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    selftext = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    selftext_html = Keyword(doc_values=False, index=False)

    link_flair_background_color = Keyword(doc_values=False, index=False)
    link_flair_css_class = Keyword()
    link_flair_richtext = Nested(RedditFlairRichtext)
    link_flair_template_id = Keyword()
    link_flair_text = Text(
        index_options=_INDEX_OPTIONS,
        index_phrases=_INDEX_PHRASES,
        term_vector=_INDEX_TERM_VECTOR,
        analyzer="standard",
    )
    link_flair_text_color = Keyword(doc_values=False, index=False)
    link_flair_type = Keyword()

    media = Object(RedditLinkMedia)
    media_embed = Object(RedditLinkMediaEmbed)
    secure_media = Object(RedditLinkMedia)
    secure_media_embed = Object(RedditLinkMediaEmbed)
    preview = Object(RedditLinkPreview)
    thumbnail = Keyword(doc_values=False, index=False)
    thumbnail_width = Short(doc_values=False, index=False)
    thumbnail_height = Short(doc_values=False, index=False)

    collections = Nested(RedditLinkCollection)

    crosspost_parent = Keyword()
    # See documentation on this in RedditPost.from_dict().
    # crosspost_parent_list = Nested(RedditLink) # noqa: E800

    allow_live_comments = Boolean()
    brand_safe = Boolean()
    contest_mode = Boolean()
    disable_comments = Boolean()
    hide_score = Boolean()
    is_blank = Boolean()  # For sample, always False if it exists.
    is_crosspostable = Boolean()
    is_meta = Boolean()  # For sample, always None if it exists.
    is_original_content = Boolean()
    is_reddit_media_domain = Boolean()
    is_robot_indexable = Boolean()
    is_self = Boolean()
    is_video = Boolean()
    media_only = Boolean()
    over_18 = Boolean()
    pinned = Boolean()
    quarantine = Boolean()
    spoiler = Boolean()

    category = Keyword()
    content_categories = Keyword(multi=True)
    discussion_type = Keyword()  # For sample, always None if it exists.
    post_categories = Keyword(
        multi=True)  # For sample, always None if it exists.
    post_hint = Keyword()
    suggested_sort = Keyword()

    previous_visits = RedditDate(multi=True)
    view_count = Integer()  # For sample, always None if it exists.

    whitelist_status = Keyword()
    wls = Short()
    parent_whitelist_status = Keyword()
    pwls = Short()

    num_comments = Integer()
    num_crossposts = Integer()

    event_is_live = Boolean()
    event_start = RedditDate()
    event_end = RedditDate()

    # Promotion-related.
    call_to_action = Keyword()
    domain_override = Keyword()
    embed_type = Keyword()
    embed_url = Keyword()
    href_url = Keyword()
    mobile_ad_url = Keyword(doc_values=False, index=False)
    outbound_link = Object(RedditLinkOutboundLink)
    promoted = Boolean()
    promoted_by = Long()
    show_media = Boolean()
    third_party_trackers = Keyword(multi=True, doc_values=False, index=False)
    third_party_tracking = Keyword(doc_values=False, index=False)
    third_party_tracking_2 = Keyword(doc_values=False, index=False)

    # Log-in required.
    hidden = Boolean()
    clicked = Boolean()
    visited = Boolean()

    # Moderator required.
    ignore_reports = Boolean()  # For sample, always False if it exists.
    removed = Boolean()  # For sample, always False if it exists.
    spam = Boolean()  # For sample, always False if it exists.

    # No idea what these are.
    # There is a "from" field in the Reddit JSON sometimes. However, from is a keyword
    # in Python and therefore can't be used as an attribute name. I opened an issue on
    # this: https://github.com/elastic/elasticsearch-dsl-py/issues/1345
    # from = Keyword()  # noqa: E800  # For sample, always None if it exists.
    from_id = Keyword()  # For sample, always None if it exists.
    from_kind = Keyword()  # For sample, always None if it exists.

    @classmethod
    @overrides
    def prepare_doc_dict(cls, doc_dict: MutableMapping[str, object]) -> None:
        super().prepare_doc_dict(doc_dict)
        doc_dict["_id"] = "t1_" + checked_cast(str, doc_dict["id"])

        # "crosspost_parent_list" contains the whole JSON dict of the post this post
        # is cross-posting somewhere. For simplicity of the data model we discard this
        # here, at the cost of a single ID-lookup to the index should it be needed
        # later.
        doc_dict.pop("crosspost_parent_list", None)
예제 #20
0
class TwitterEntitiesMediaVideoInfo(InnerDoc):
    aspect_ratio = Short(multi=True)
    duration_millis = Integer()
    variants = Nested(TwitterEntitiesMediaVideoInfoVariant)
예제 #21
0
class Politicians(Document):
    source = Nested(Source)
    unidade_eleitoral = Nested(UnidadeEleitoral)
    foto_url = Text()
    # CSV fields
    ano_eleicao = Integer(required=True)
    cd_cargo = Text(fields={'keyword': Keyword()})
    cd_cor_raca = Text(fields={'keyword': Keyword()})
    cd_estado_civil = Text(fields={'keyword': Keyword()})
    codigo_legenda = Text(fields={'keyword': Keyword()})
    cd_municipio_nascimento = Text(fields={'keyword': Keyword()})
    cd_nacionalidade = Text(fields={'keyword': Keyword()})
    cd_ocupacao = Text(fields={'keyword': Keyword()})
    cd_genero = Text(fields={'keyword': Keyword()})
    cd_grau_instrucao = Text(fields={'keyword': Keyword()})
    cd_situacao_candidatura = Text(fields={'keyword': Keyword()})
    cd_sit_tot_turno = Text(fields={'keyword': Keyword()})
    composicao_legenda = Text(fields={'keyword': Keyword()})
    nr_cpf_candidato = Text(fields={'keyword': Keyword()})
    dt_geracao = Date()
    dt_nascimento = Text(fields={'keyword': Keyword()})
    ds_cargo = Text(fields={'keyword': Keyword()})
    ds_cor_raca = Text(fields={'keyword': Keyword()})
    ds_eleicao = Text(fields={'keyword': Keyword()})
    ds_estado_civil = Text(fields={'keyword': Keyword()})
    ds_grau_instrucao = Text(fields={'keyword': Keyword()})
    ds_nacionalidade = Text(fields={'keyword': Keyword()})
    ds_ocupacao = Text(fields={'keyword': Keyword()})
    ds_genero = Text(fields={'keyword': Keyword()})
    nm_ue = Text(fields={'keyword': Keyword()})
    ds_sit_tot_turno = Text(fields={'keyword': Keyword()})
    nr_despesa_max_campanha = Text(fields={'keyword': Keyword()})
    ds_situacao_candidatura = Text(fields={'keyword': Keyword()})
    hr_geracao = Text(fields={'keyword': Keyword()})
    idade_data_eleicao = Text(fields={'keyword': Keyword()})
    nm_email = Text(fields={'keyword': Keyword()})
    nm_candidato = Text(fields={'keyword': Keyword()})
    nome_legenda = Text(fields={'keyword': Keyword()})
    nm_municipio_nascimento = Text(fields={'keyword': Keyword()})
    nm_partido = Text(fields={'keyword': Keyword()})
    nm_urna_candidato = Text(fields={'keyword': Keyword()})
    nr_candidato = Text(fields={'keyword': Keyword()})
    nr_partido = Text(fields={'keyword': Keyword()})
    nr_titulo_eleitoral_candidato = Text(fields={'keyword': Keyword()})
    nr_turno = Text(fields={'keyword': Keyword()})
    sq_candidato = Text(fields={'keyword': Keyword()})
    sigla_legenda = Text(fields={'keyword': Keyword()})
    sg_partido = Text(fields={'keyword': Keyword()})
    sg_ue = Text(fields={'keyword': Keyword()})
    sg_uf = Text(fields={'keyword': Keyword()})
    sg_uf_nascimento = Text(fields={'keyword': Keyword()})
    # 2018
    cd_detalhe_situacao_cand = Integer()
    cd_eleicao = Integer()
    cd_tipo_eleicao = Short()
    ds_detalhe_situacao_cand = Text(fields={'keyword': Keyword()})
    dt_eleicao = Text(fields={'keyword': Keyword()})
    nm_social_candidato = Text(fields={'keyword': Keyword()})
    nm_tipo_eleicao = Text(fields={'keyword': Keyword()})
    nr_idade_data_posse = Short()
    nr_processo = Text(fields={'keyword': Keyword()})
    nr_protocolo_candidatura = Text(fields={'keyword': Keyword()})
    st_declarar_bens = Text(fields={'keyword': Keyword()})
    st_reeleicao = Text(fields={'keyword': Keyword()})
    tp_abrangencia = Text(fields={'keyword': Keyword()})
    tp_agremiacao = Text(fields={'keyword': Keyword()})

    @classmethod
    def set_index_name(cls, year):
        return f'{INDEX_NAME}-{year}'

    def save(self, **kwargs):
        kwargs['index'] = Politicians.set_index_name(self.ano_eleicao)
        return super(Politicians, self).save(**kwargs)

    @classmethod
    def bulk_save(cls, dicts):
        objects = (dict(d.to_dict(include_meta=True),
                        **{'_index': cls.set_index_name(int(d.ano_eleicao))})
                   for d in dicts)
        client = connections.get_connection()
        return bulk(client, objects)

    @classmethod
    def bulk_update(cls, dicts, client=None):
        def upsert(doc):
            d = doc.to_dict(True)
            d['_op_type'] = 'update'
            d['doc'] = d['_source']
            d['doc_as_upsert'] = True
            del d['_source']
            return d

        client = client or connections.get_connection()
        return bulk(client, (upsert(d) for d in dicts))
예제 #22
0
class TwitterEntitiesMediaOriginalInfo(InnerDoc):
    height = Short(doc_values=False, index=False)
    width = Short(doc_values=False, index=False)
    focus_rects = Nested(TwitterEntitiesMediaRect)
예제 #23
0
class TwitterEntitiesMediaRect(InnerDoc):
    x = Short(doc_values=False, index=False)
    y = Short(doc_values=False, index=False)
    h = Short(doc_values=False, index=False)
    w = Short(doc_values=False, index=False)
예제 #24
0
class TestEntry(DocType):
    timestamp = Date()
    symbol = Text(analyzer='standard', fields={'raw': Keyword()})
    AverageDailyVolume = Long()
    BookValue = Double()
    Change_PercentChange = Text(analyzer='standard')
    Change = Double()
    Currency = Text(analyzer='standard', fields={'raw': Keyword()})
    DividendShare = Double()
    LastTradeDate = Date()
    EarningsShare = Double()
    EPSEstimateCurrentYear = Double()
    EPSEstimateNextYear = Double()
    EPSEstimateNextQuarter = Double()
    DaysLow = Double()
    DaysHigh = Double()
    YearLow = Double()
    YearHigh = Double()
    MarketCapitalization = Text(analyzer='standard')
    EBITDA = Text(analyzer='standard')
    ChangeFromYearLow = Double()
    PercebtChangeFromYearLow = Text(analyzer='standard')
    LastTradeWithTime = Text(analyzer='standard')
    LastTradePriceOnly = Double()
    DaysRange = Text(analyzer='standard')
    FiftydayMovingAverage = Double()
    TwoHundreddayMovingAverage = Double()
    ChangeFromTwoHundreddayMovingAverage = Double()
    PercentChangeFromTwoHundreddayMovingAverage = Text(analyzer='standard')
    ChangeFromFiftydayMovingAverage = Double()
    PercentChangeFromFiftydayMovingAverage = Text(analyzer='standard')
    Name = Text(analyzer='standard', fields={'raw': Keyword()})
    Open = Double()
    PreviousClose = Double()
    ChangeinPercent = Text(analyzer='standard')
    PriceSales = Double()
    PriceBook = Double()
    ExDividendDate = Date()
    PERatio = Double()
    DividendPayDate = Date()
    PEGRatio = Double()
    PriceEPSEstimateCurrentYear = Double()
    PriceEPSEstimateNextYear = Double()
    ShortRatio = Double()
    LastTradeTime = Date()
    OneyrTargetPrice = Double()
    Volume = Long()
    YearRange = Text(analyzer='standard')
    StockExchange = Text(analyzer='standard', fields={'raw': Keyword()})
    DividendYield = Double()
    PercentChange = Text(analyzer='standard')
    RiskModelScoreInit = Short()
    RiskModelScoreCurrent = Short()
    PEGScoreInit = Short()
    PEGScoreCurrent = Short()
    DivScoreInit = Short()
    DivScoreCurrent = Short()
    ShortScoreInit = Short()
    ShortScoreCurrent = Short()
    PriceScoreInit = Short()
    PriceStoreCurrent = Short()
    OverseasScoreInit = Short()
    OverseasScoreCurrent = Short()
    DivYieldInit = Double()
    NumOfSharesInit = Double()
    NumOfSharesCurrent = Double()
    PriceInit = Double()
    TotalValueInit = Double()
    TotalValueCurrent = Double()
    PercentOfTotalPortfolioValue = Double()
    PercentGainSinceInception = Double()
    TotalPortfolioValueInit = Double()
    TotalPortfolioValueCurrent = Double()
    TotalPortfolioGainSinceInception = Double()
    stored_at = Date()

    class Meta:
        index = 'portfolio'

    def save(self, ** kwargs):
        self.RiskModelScoreCurrent = self.PEGScoreCurrent + self.DivScoreCurrent + self.ShortScoreCurrent + self.PriceScoreCurrent + self.OverseasScoreCurrent
        
        self.TotalValueCurrent = self.NumOfSharesCurrent * self.LastTradePriceOnly
        
        self.PercentOfTotalPortfolioValue = self.TotalValueCurrent / self.TotalPortfolioValueCurrent
        
        self.PercentGainSinceInception = (self.TotalValueCurrent - self.TotalValueInit) / self.TotalValueInit
        
        self.TotalPortfolioGainSinceInception = (self.TotalPortfolioValueCurrent - self.TotalPortfolioValueInit) / self.TotalPortfolioValueInit
        
        self.stored_at = datetime.datetime.now()
        return super(TestEntry, self).save(** kwargs)
class Recipe(Document):
    """Python representation of a Recipe document in Elasticsearch.

    Args:
        name: A string, the recipe title.
        ingredients: A list of strings, the ingredients of the recipe.
        url: A string, the URL from where the recipe was sourced
        source: A string, the original publisher of the recipe

        calories: An int, the calorie count of the recipe. randint(0, 1400)
        carbohydrate: An int, the carb count of the recipe. randint(0, 75)
        fat: An int, the fat count of the recipe. randint(0, 100)
        protein: An int, the protein count of the recipe. randint(0, 50)

        image: A optional string, the URL for an image of the recipe
        cookTime: An optional string, the cook time
        recipeYield: An optional string, the recipe yield
        datePublished: An optional string, the original publish date
        prepTime: An optional string, the prep time
        description: An optional string, the recipe pretext/description
        totalTime: An optional string, the total cook/prep time
        creator: An optional string, the original author of the recipe
        recipeCategory: An optional string, the type of recipe
        recipeInstructions: An optional string, the recipe instructions
        tags: An optional string array containing any of ["vegetarian", "vegan", "gluten-free"]
    """

    # These fields should be identical to those in recipe-db/loading-scripts/recipe-mapping.json
    name = Text(fields={"keyword": Keyword()})
    ingredients = Text(fields={"keyword": Keyword()})
    url = Text(fields={"keyword": Keyword()})
    source = Text(fields={"keyword": Keyword()})
    calories = Short()
    carbohydrate = Short()
    fat = Short()
    protein = Short()
    image = Text(fields={"keyword": Keyword()})
    cookTime = Text(fields={"keyword": Keyword()})
    recipeYield = Text(fields={"keyword": Keyword()})
    datePublished = Text(fields={"keyword": Keyword()})
    prepTime = Text(fields={"keyword": Keyword()})
    description = Text(fields={"keyword": Keyword()})
    totalTime = Text(fields={"keyword": Keyword()})
    creator = Text(fields={"keyword": Keyword()})
    recipeCategory = Text(fields={"keyword": Keyword()})
    recipeInstructions = Text(fields={"keyword": Keyword()})
    tags = Text(fields={"keyword": Keyword()})

    # The Index inner class is where we define connection config
    class Index:
        name = "recipes"

    @classmethod
    def _get_using(cls, using=None):
        """Override base method for specifying our current Elasticsearch connection"""
        return current_app.elasticsearch

    def get_image_url(self, use_google=False):
        """Return a URL for an image of this recipe.
        It will try to return the OpenRecipes scraped image if it exists,
        else it will do a Google image search, else it will return a default
        placeholder image.

        Args:
            use_google: If true, will make an API call to Google images for missing
                images, else it will skip this step (for API quota purposes).

        Returns:
            A string URL which can be GET requested to obtain an image
        """
        # First try the OpenRecipes image
        try:
            response = requests.head(self.image, allow_redirects=True)

            if response.status_code == 200:
                return self.image
        except Exception:
            # e.g. timeout
            pass

        # Then try the first Google Image search result
        if use_google:
            try:
                google_image_search = GoogleImagesSearch(None, None)

                google_image_search.search(
                    search_params={
                        "q": self.name,
                        "num": 1,
                    }
                )

                return google_image_search.results()[0].url
            except Exception:
                # e.g. API quota limit reached
                pass

        # Else return our default image
        return url_for("static", filename="images/default_recipe_image.jpg")

    # THESE ARE SAMPLE METHODS FOR YOU TO GET DATA FROM

    @classmethod
    def get_single_recipe(cls):
        """Return a single Recipe object from Elasticsearch

        Returns:
            A Recipe object.
        """
        return cls.search().execute()[0]

    @classmethod
    def get_multi_recipe_paged(cls, page=0, per_page=10):
        """Return a list of Recipes, considering pagination

        Usage:

            >>> # Default options just gets you the first 10 recipes
            >>> recipes_0 = Recipe.get_multi_recipe_paged() #page=0
            >>> # Get next set of results by specifying the page
            >>> recipes_1 = Recipe.get_multi_recipe_paged(page=1)
            >>> # Get more results by changing page size
            >>> recipes_0_4 = Recipe.get_multi_recipe_paged(per_page=50)

        Args:
            page: The page of results to get
            per_page: The size of each page of results to get

        Returns:
            A list of Recipe object
        """
        return list(cls.search()[page * per_page : (page + 1) * per_page].execute())

    # TODO: CUSTOM SEARCH METHODS
    @classmethod
    def get_recipe_by_id(cls, recipe_id):
        """Return a single Recipe object from Elasticsearch by its ID

        Args:
            recipe_id: The ID of the recipe to get.

        Returns:
            The Recipe object corresponding to the given ID, or None if not found
        """
        try:
            return cls.get(recipe_id)
        except Exception:
            return None

    @classmethod
    def get_recipes_by_criteria(cls, page=0, per_page=10, **criteria):
        """Advanced search wrapper for Recipes.

        An example set of criteria is as follows:
        e.g. criteria = {
            "query": "dip",
            "ingredients": "olive oil, garlic",
            "tags": ["gluten-free", "vegetarian"],
            "minCalories": 0,
            "maxCalories": 100,
            "minCarbs": 0,
            "maxCarbs": 100,
            "minProteins": 0,
            "maxProteins": 100,
            "minFats": 0,
            "maxFats": 100,
        }
        Note that all of the items are optional and will be ignored if omitted or
        if falsy values are provided (e.g. False, None, [], {}, "")

        Usage::
            >>> # e.g. direct kwargs
            >>> Recipe.get_recipes_by_criteria(query="dip", tags=["vegetarian"]).execute()
            >>> # e.g. splat kwargs
            >>> criteria = {"query":"dip, "tags":["vegetarian"]}
            >>> Recipe.get_recipes_by_criteria(**criteria).execute()

        Args:
            page: The page of results to get
            per_page: The size of each page of results to get
            critiera: kwargs of the below
                query: The recipe name to (partly) match
                ingredients: List of ingredients the recipe should contain (any)
                tags: List of tags the recipe should match
                calories: Integer tuple range
                carbohydrate: Integer tuple range
                fat: Integer tuple range
                protein: Integer tuple range

        Returns:
            An elasticsearch_dsl.Search object, which you can get a list
            of recipes out of by doing list(search_object.execute())
        """
        search = cls.search()[page * per_page : (page + 1) * per_page]

        if criteria.get("query"):
            search = search.query(
                Q("fuzzy", name=criteria.get("query"))
                | Q("match", name=criteria.get("query"))
            )

        if criteria.get("ingredients"):
            ingredients = criteria.get("ingredients")
            if isinstance(ingredients, str):
                ingredients = [i.strip() for i in ingredients.split(",")]
            search = search.query("terms", ingredients=ingredients)

        if criteria.get("tags"):
            search = search.filter(
                "terms_set",
                tags__keyword={
                    "terms": criteria.get("tags"),
                    "minimum_should_match_script": {"source": "params.num_terms"},
                },
            )

        if criteria.get("minCalories"):
            search = search.filter(
                "range", calories={"gte": criteria.get("minCalories")}
            )

        if criteria.get("maxCalories"):
            search = search.filter(
                "range", calories={"lte": criteria.get("maxCalories")}
            )

        if criteria.get("minCarbs"):
            search = search.filter("range", calories={"gte": criteria.get("minCarbs")})

        if criteria.get("maxCarbs"):
            search = search.filter("range", calories={"lte": criteria.get("maxCarbs")})

        if criteria.get("minProteins"):
            search = search.filter(
                "range", calories={"gte": criteria.get("minProteins")}
            )

        if criteria.get("maxProteins"):
            search = search.filter(
                "range", calories={"lte": criteria.get("maxProteins")}
            )

        if criteria.get("minFats"):
            search = search.filter("range", calories={"gte": criteria.get("minFats")})

        if criteria.get("maxFats"):
            search = search.filter("range", calories={"lte": criteria.get("maxFats")})

        return search

    @classmethod
    def get_recipe_suggestions(cls, prefix):
        search = cls.search()
        search = search.query(
            Q("match_phrase_prefix", name=prefix) | Q("prefix", name=prefix)
        )
        return search
예제 #26
0
class DocTestSSLResult(Document):

    source = Text(fields={'raw': Keyword()})
    result = Boolean()
    timestamp = Date()
    ip = Keyword()
    hostname = Keyword()
    port = Integer()
    svcid = Keyword()
    protocols = Keyword(multi=True)
    ciphers = Text(multi=True, fields={'raw': Keyword()})
    ciphertests = Keyword(multi=True)
    serverpref = Object(
        properties={
            "cipher_order": Boolean(),
            "protocol": Keyword(),
            "cipher": Text(fields={'raw': Keyword()})
        })
    cert = Object(
        properties={
            "keysize": Short(),
            "signalgo": Text(fields={'raw': Keyword()}),
            "md5_fingerprint": Keyword(),
            "sha1_fingerprint": Keyword(),
            "sha256_fingerprint": Keyword(),
            "cn": Text(fields={'raw': Keyword()}),
            "san": Text(multi=True, fields={'raw': Keyword()}),
            "issuer": Text(fields={'raw': Keyword()}),
            "ev": Boolean(),
            "expiration": Date(),
            "ocsp_uri": Text(fields={'raw': Keyword()}),
            "Crl_url": Text(fields={'raw': Keyword()}),
            "ocsp_stapling": Boolean(),
        })
    vulnerabilities = Keyword(multi=True)

    def parseCSVLine(self, line):
        if line['id'] == "id":
            return

        if not self.ip or not self.hostname or not self.port:  # host, ip and port
            m = reIpHostColumn.search(line['fqdn/ip'])
            if m:
                self.hostname, self.ip = m.groups()
            self.port = int(line['port'])

        if reProtocol.search(line['id']) and reOffers.search(
                line['finding']):  # protocols
            self.result = True
            m = reProtocol.search(line['id'])
            if m:
                self.protocols.append(line['id'].upper())

        elif reCipherColumnName.search(line['id']):  # ciphers IT WORKS
            m = reCipherDetails.search(line['finding'])
            if m:
                self.ciphers.append(m.group(1))

        elif reCipherTests.search(line['id']) and reVulnerable.search(
                line['finding']):  # cipher tests
            m = reCipherTests.search(line['id'])
            print(m)
            if m:
                self.ciphertests.append(m.group(1))

        if line['id'] == "cipher_order":  # server prefers cipher IT WORKS
            self.serverpref.cipher_order = bool(reOk.search(line['severity']))

        elif line[
                'id'] == "protocol_negotiated":  # preferred protocol IT WORKS
            m = reDefaultProtocol.search(line['finding'])

            if m:
                self.serverpref.protocol = m.group(1)

        elif line['id'] == "cipher_negotiated":  # preferred cipher  IT WORKS
            m = reDefaultCipher.search(line['finding'])
            if m:
                self.serverpref.cipher = m.group(1)

        elif line['id'] == "cert_keySize":  # certificate key size IT WORKS
            m = reKeySize.search(line['finding'])
            if m:
                self.cert.keysize = int(m.group(1))

        elif line[
                'id'] == "cert_signatureAlgorithm":  # certificate sign algorithm IT WORKS
            m = reSignAlgorithm.search(line['finding'])
            if m:
                self.cert.signalgo = m.group(1)

        elif line[
                'id'] == "cert_fingerprintSHA1":  # certificate fingerprints SHA1 IT WORKS

            m = reFPSHA1.search(line['finding'])
            if m:
                self.cert.sha1_fingerprint = m.group(1)

        elif line[
                'id'] == "cert_fingerprintSHA256":  # certificate fingerprints SHA256 IT WORKS

            m = reFPSHA256.search(line['finding'])
            if m:
                self.cert.sha256_fingerprint = m.group(1)

        elif line[
                'id'] == "cert_fingerprintMD5":  # certificate fingerprints MD5 IT WORKS
            m = reFPMD5.search(line['finding'])
            if m:
                self.cert.md5_fingerprint = m.group(1)

        elif line['id'] == "cert_commonName":  # certificate CN IT WORKS
            m = reCN.search(line['finding'])
            if m:
                self.cert.cn = m.group(1)

        elif line[
                'id'] == "cert_subjectAltName":  # certificate SAN KINDA WORKS NEEDS REVISION
            m = reSAN.search(line['finding'])
            #print(m)
            if m:
                self.cert.san = m.group(1)

#sans = m.group(1)
#for san in sans.split(" "):
#    if san != "--":
#        self.cert.san.append(san)"""

        elif line['id'] == "cert_caIssuers":  # certificate issuer IT WORKS
            m = reIssuer.search(line['finding'])
            if m:
                self.cert.issuer = m.group(1)

        elif line['id'] == "ev":  # certificate extended validation NOT SUERE
            self.cert.ev = bool(reYes.search(line['finding']))

        elif line['id'] == "cert_notAfter":  # certificate expiration IT WORKS
            m = reExpiration.search(line['finding'])
            if m:
                unparsedDate = m.group(1)
                self.cert.expiration = datetime.strptime(
                    unparsedDate, "%Y-%m-%d %H:%M")

        elif line[
                'id'] == "cert_ocspURL":  # certificate OCSP URI IT WORKS ELSE NEEDS REWORK
            m = reOCSPURI.search(line['finding'])
            #print(m)
            if m:
                self.cert.ocsp_uri = m.group(1)
            else:
                self.cert.ocsp_uri = "-"

        elif line[
                'id'] == "cert_crlDistributionPoints":  # certificate CRL WORKS
            m = reAll.search(line['finding'])
            #print(m)
            if m:
                self.cert.Crl_url = m.group(1)
            else:
                self.cert.Crl_url = "-"

        elif line['id'] == "OCSP_stapling":  # certificate OCSP stapling
            self.cert.ocsp_stapling = not bool(
                reNotOffered.search(line['finding']))

        elif line['id'] in ("heartbleed", "CCS", "secure_renego",
                            "secure_client_renego", "CRIME_TLS", "SWEET32",
                            "POODLE_SSL", "fallback_SCSV", "FREAK", "DROWN",
                            "LOGJAM", "BEAST", "LUCKY13",
                            "RC4") and reVulnerable.search(line['severity']):
            m = reVulnerable.search(line['severity'])
            if str(m.group(1)) != '':
                self.vulnerabilities.append(line['id'].upper())

    def parseCSV(self, csvfile):
        if self.source:
            m = reDefaultFilename.search(self.source)
            if m:
                self.ip = m.group('ip')
                self.port = int(m.group('port') or 0)
                self.timestamp = datetime.strptime(m.group('datetime'),
                                                   "%Y%m%d-%H%M")
        csvReader = csv.DictReader(csvfile,
                                   fieldnames=("id", "fqdn/ip", "port",
                                               "severity", "finding", "cve",
                                               "cwe"),
                                   delimiter=',',
                                   quotechar='"')
        for line in csvReader:
            self.parseCSVLine(line)

    def save(self, **kwargs):
        if not self.timestamp:
            self.timestamp = datetime.now(tz)
        if not self.port:
            raise ValueError("Empty scan result")

        self.svcid = "%s:%d" % (self.ip, int(self.port) or 0)
        if not self.result:
            self.result = False

        if 'debug' in kwargs and kwargs['debug']:
            pp.pprint(self.to_dict())
        return super().save()
예제 #27
0
class DocHTTPRequestResponse(DocType):
    class Meta:
        doc_type = 'HTTPRequestResponse'

    timestamp = Date()
    protocol = Text()
    host = Keyword()
    port = Integer()
    request = Object(
        properties={
            'method':
            Keyword(),
            'url':
            Text(fields={'keyword': Keyword()}),
            'requestline':
            Text(fields={'keyword': Keyword()}),
            'content_type':
            Text(fields={'keyword': Keyword()}),
            'headernames':
            Text(analyzer=identifierAnalyzer,
                 multi=True,
                 fields={'keyword': Keyword()}),
            'headers':
            Nested(
                properties={
                    'name':
                    Text(analyzer=identifierAnalyzer,
                         fields={'keyword': Keyword()}),
                    'value':
                    Text(fields={'keyword': Keyword()})
                }),
            'parameternames':
            Text(analyzer=identifierAnalyzer,
                 multi=True,
                 fields={'keyword': Keyword()}),
            'parameters':
            Nested(
                properties={
                    'type':
                    Keyword(),
                    'name':
                    Text(analyzer=identifierAnalyzer,
                         fields={'keyword': Keyword()}),
                    'value':
                    Text(fields={'keyword': Keyword()})
                }),
            'body':
            Text(include_in_all=False)
        })
    response = Object(
        properties={
            'status':
            Short(),
            'responseline':
            Text(fields={'keyword': Keyword()}),
            'content_type':
            Text(fields={'keyword': Keyword()}),
            'inferred_content_type':
            Text(fields={'keyword': Keyword()}),
            'headernames':
            Text(analyzer=identifierAnalyzer,
                 multi=True,
                 fields={'keyword': Keyword()}),
            'headers':
            Nested(
                properties={
                    'name':
                    Text(analyzer=identifierAnalyzer,
                         fields={'keyword': Keyword()}),
                    'value':
                    Text(fields={'keyword': Keyword()})
                }),
            'cookienames':
            Text(analyzer=identifierAnalyzer,
                 multi=True,
                 fields={'keyword': Keyword()}),
            'cookies':
            Nested(
                properties={
                    'domain':
                    Text(fields={'keyword': Keyword()}),
                    'expiration':
                    Date(fields={'keyword': Keyword()}),
                    'name':
                    Text(analyzer=identifierAnalyzer,
                         fields={'keyword': Keyword()}),
                    'path':
                    Text(fields={'keyword': Keyword()}),
                    'value':
                    Text(fields={'keyword': Keyword()})
                }),
            'body':
            Text(include_in_all=False),
            'doctype':
            Text(multi=True, fields={'keyword': Keyword()}),
            'base':
            Text(multi=True, fields={'keyword': Keyword()}),
            'stylesheets':
            Text(multi=True, fields={'keyword': Keyword()}),
            'frames':
            Text(multi=True, fields={'keyword': Keyword()}),
            'scripts':
            Text(multi=True, fields={'keyword': Keyword()}),
            'links':
            Text(multi=True, fields={'keyword': Keyword()}),
            'images':
            Text(multi=True, fields={'keyword': Keyword()}),
            'audio':
            Text(multi=True, fields={'keyword': Keyword()}),
            'video':
            Text(multi=True, fields={'keyword': Keyword()}),
            'objects':
            Text(multi=True, fields={'keyword': Keyword()}),
            'formactions':
            Text(multi=True, fields={'keyword': Keyword()}),
            'extrefs':
            Text(multi=True, fields={'keyword': Keyword()
                                     }),  # all external references
        })

    def add_request_header(self, header):
        parsed = parse_header(header)
        self.request.headers.append(parsed)
        self.request.headernames.append(parsed['name'])

    def add_response_header(self, header):
        parsed = parse_header(header)
        self.response.headers.append(parsed)
        self.response.headernames.append(parsed['name'])

    def add_parsed_request_header(self, name, value):
        self.request.headers.append({"name": name, "value": value})
        self.request.headernames.append(name)

    def add_parsed_response_header(self, name, value):
        self.response.headers.append({"name": name, "value": value})
        self.response.headernames.append(name)

    def add_request_parameter(self, typename, name, value):
        param = {'type': typename, 'name': name, 'value': value}
        self.request.parameters.append(param)
        self.request.parameternames.append(param['name'])

    def add_response_cookie(self,
                            name,
                            value,
                            domain=None,
                            path=None,
                            expiration=None):
        cookie = {
            'name': name,
            'value': value,
            'domain': domain,
            'path': path,
            'expiration': expiration
        }
        self.response.cookies.append(cookie)
        self.response.cookienames.append(cookie['name'])

    def save(self, storeResponseBody=True, **kwargs):
        if not self.timestamp:
            self.timestamp = datetime.now(
                tz
            )  # TODO: timestamp options: now (as is), request and response
        if self.response.body and (
            (self.response.inferred_content_type
             and self.response.inferred_content_type == "HTML") or
            (not self.response.inferred_content_type
             and "HTML" in self.response.content_type
             or "html" in self.response.content_type)):
            parser = WASEHTMLParser()
            parser.feed(self.response.body)
            parser.close()

            self.response.doctype = list(parser.doctype)
            self.response.base = list(parser.base)
            self.response.stylesheets = list(parser.stylesheets)
            self.response.frames = list(parser.frames)
            self.response.scripts = list(parser.scripts)
            self.response.links = list(parser.links)
            self.response.images = list(parser.images)
            self.response.audio = list(parser.audio)
            self.response.video = list(parser.video)
            self.response.objects = list(parser.objects)
            self.response.formactions = list(parser.formactions)
            self.response.extrefs = list(parser.extrefs)

        if not storeResponseBody:
            self.response.body = None
        return super(DocHTTPRequestResponse, self).save(**kwargs)
예제 #28
0
class TwitterEntitiesMediaSize(InnerDoc):
    h = Short(doc_values=False, index=False)
    w = Short(doc_values=False, index=False)
    resize = Keyword(doc_values=False, index=False)
예제 #29
0
class CardNameIndex(Document):
    datum = Text(norms=False)
    naam = Text(norms=False)
    inhoud = Text(norms=False)
    bron = Text(norms=False)
    getuigen = Text(norms=False)
    bijzonderheden = Text(norms=False)

    naam_keyword = Keyword()
    jaar = Short()

    class Index:
        name = 'namenindex'

        def __new__(cls):
            return Index(name=cls.name)

    @classmethod
    def from_csv_line(cls, line: List[str]) -> 'CardNameIndex':
        doc = cls()
        if len(line[0]) == 0:
            return doc
        doc.meta.id = int(line[0])
        doc.datum = cls.parse_entry(line[1])
        doc.naam = cls.parse_entry(line[2])
        doc.inhoud = cls.parse_entry(line[3])
        doc.bron = cls.parse_entry(line[4])
        doc.getuigen = cls.parse_entry(line[5])
        doc.bijzonderheden = cls.parse_entry(line[6])
        if not doc.is_valid():
            return doc
        if doc.naam is not None:
            doc.naam_keyword = cls.create_name_keyword(str(doc.naam))
        if doc.datum is not None:
            doc.jaar = cls.create_year(str(doc.datum))
        return doc

    def is_valid(self):
        # At the end of a file there may be empty lines, skip them.
        if getattr(self.meta, 'id', None) is None:
            return False
        # Skip row if there is no data except an id. This happens a lot at the end of a file.
        if self.naam is None and self.datum is None:
            return False
        return True

    @staticmethod
    def parse_entry(entry: str) -> Optional[str]:
        return entry.strip() or None

    @staticmethod
    def create_name_keyword(naam: str) -> str:
        """Get a single keyword from the name field."""
        # todo: fix this one: Albrecht (St), van
        if len(naam.split(',')) >= 2:
            return naam.split(',')[0]
        elif len(naam.split('~')) >= 2:
            return naam.split('~')[0]
        elif len(naam.split(' ')) >= 2:
            return naam.split(' ')[0]
        else:
            return naam

    @staticmethod
    def create_year(datum: str) -> Optional[int]:
        """Parse a year from the datum field."""
        if datum is None or len(datum) < 4 or not datum[:4].isdigit():
            return None
        jaar = int(datum[:4])
        if 1000 < jaar < 2000:
            return jaar
        return None
예제 #30
0
class DocTestSSLResult(DocType):
    class Meta:
        doc_type = "TestSSLResult"

    source = String(fields={'raw': String(index='not_analyzed')})
    result = Boolean()
    timestamp = Date()
    ip = String(index='not_analyzed')
    hostname = String(index='not_analyzed')
    port = Integer()
    svcid = String(index='not_analyzed')
    protocols = String(index='not_analyzed', multi=True)
    ciphers = String(multi=True, fields={'raw': String(index='not_analyzed')})
    ciphertests = String(index='not_analyzed', multi=True)
    serverpref = Object(
            properties = {
                "cipher_order": Boolean(),
                "protocol": String(index='not_analyzed'),
                "cipher": String(fields={'raw': String(index='not_analyzed')})
                })
    cert = Object(
            properties = {
                "keysize": Short(),
                "signalgo": String(fields={'raw': String(index='not_analyzed')}),
                "md5_fingerprint": String(index='not_analyzed'),
                "sha1_fingerprint": String(index='not_analyzed'),
                "sha256_fingerprint": String(index='not_analyzed'),
                "cn": String(fields={'raw': String(index='not_analyzed')}),
                "san": String(multi=True, fields={'raw': String(index='not_analyzed')}),
                "issuer": String(fields={'raw': String(index='not_analyzed')}),
                "ev": Boolean(),
                "expiration": Date(),
                "ocsp_uri": String(fields={'raw': String(index='not_analyzed')}),
                "ocsp_stapling": Boolean(),
                })
    vulnerabilities = String(index='not_analyzed', multi=True)

    def parseCSVLine(self, line):
        if line['id'] == "id":
            return
        if not self.ip or not self.hostname or not self.port:   # host, ip and port
            m = reIpHostColumn.search(line['host'])
            if m:
                self.hostname, self.ip = m.groups()
            self.port = int(line['port'])

        if reProtocol.search(line['id']) and reOffers.search(line['finding']):     # protocols
            self.result = True
            m = reProtocol.search(line['id'])
            if m:
                self.protocols.append(line['id'].upper())
        elif reCipherColumnName.search(line['id']):                  # ciphers
            m = reCipherDetails.search(line['finding'])
            if m:
                self.ciphers.append(m.group(1))
        elif reCipherTests.search(line['id']) and reVulnerable.search(line['finding']):                       # cipher tests
            m = reCipherTests.search(line['id'])
            if m:
                self.ciphertests.append(m.group(1))
        elif line['id'] == "order":                                 # server prefers cipher
            self.serverpref.cipher_order = bool(reOk.search(line['finding']))
        elif line['id'] == "order_proto":                           # preferred protocol
            m = reDefaultProtocol.search(line['finding'])
            if m:
                self.serverpref.protocol = m.group(1)
        elif line['id'] == "order_cipher":                          # preferred cipher
            m = reDefaultCipher.search(line['finding'])
            if m:
                self.serverpref.cipher = m.group(1)
        elif line['id'] == "key_size":                              # certificate key size
            m = reKeySize.search(line['finding'])
            if m:
                self.cert.keysize = int(m.group(1))
        elif line['id'] == "algorithm":                             # certificate sign algorithm
            m = reSignAlgorithm.search(line['finding'])
            if m:
                self.cert.signalgo = m.group(1)
        elif line['id'] == "fingerprint":                           # certificate fingerprints
            m = reFPMD5.search(line['finding'])
            if m:
                self.cert.md5_fingerprint = m.group(1)
            m = reFPSHA1.search(line['finding'])
            if m:
                self.cert.sha1_fingerprint = m.group(1)
            m = reFPSHA256.search(line['finding'])
            if m:
                self.cert.sha256_fingerprint = m.group(1)
        elif line['id'] == "cn":                                    # certificate CN
            m = reCN.search(line['finding'])
            if m:
                self.cert.cn = m.group(1)
        elif line['id'] == "san":                                   # certificate SAN
            m = reSAN.search(line['finding'])
            if m:
                sans = m.group(1)
                for san in sans.split(" "):
                    if san != "--":
                        self.cert.san.append(san)
        elif line['id'] == "issuer":                                # certificate issuer
            m = reIssuer.search(line['finding'])
            if m:
                self.cert.issuer = m.group(1)
        elif line['id'] == "ev":                                    # certificate extended validation
            self.cert.ev = bool(reYes.search(line['finding']))
        elif line['id'] == "expiration":                            # certificate expiration
            m = reExpiration.search(line['finding'])
            if m:
                unparsedDate = m.group(1)
                self.cert.expiration = datetime.strptime(unparsedDate, "%Y-%m-%d %H:%M %z") 
        elif line['id'] == "ocsp_uri":                              # certificate OCSP URI
            m = reOCSPURI.search(line['finding'])
            if m:
                self.cert.ocsp_uri = m.group(1)
            else:
                self.cert.ocsp_uri = "-"
        elif line['id'] == "ocsp_stapling":                         # certificate OCSP stapling
            self.cert.ocsp_stapling = not bool(reNotOffered.search(line['finding']))
        elif line['id'] in ("heartbleed", "ccs", "secure_renego", "sec_client_renego", "crime", "breach", "poodle_ssl", "fallback_scsv", "freak", "DROWN", "logjam", "beast", "rc4") and reVulnerable.search(line['finding']):
            self.vulnerabilities.append(line['id'].upper())

    def parseCSV(self, csvfile):
        if self.source:
            m = reDefaultFilename.search(self.source)
            if m:
                self.ip = m.group('ip')
                self.port = int(m.group('port') or 0)
                self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M")
        csvReader = csv.DictReader(csvfile, fieldnames=("id", "host", "port", "severity", "finding"), delimiter=',', quotechar='"')
        for line in csvReader:
            self.parseCSVLine(line)

    def save(self, **kwargs):
        if not self.timestamp:
            self.timestamp = datetime.now(tz)
        if not self.port:
            raise ValueError("Empty scan result")

        self.svcid = "%s:%d" % (self.ip, int(self.port) or 0)
        if not self.result:
            self.result = False

        if 'debug' in kwargs and kwargs['debug']:
            pp.pprint(self.to_dict())
        return super().save()