Esempio n. 1
0
def main():
    try:
        with open(REFINERY_CONFIG_FILE) as config_file:
            config = yaml.load(config_file)
    except (IOError, yaml.YAMLError) as exc:
        sys.stderr.write("Error reading {}: {}\n".format(
            REFINERY_CONFIG_FILE, exc))
        raise RuntimeError

    stack_name = config['STACK_NAME'] + 'Storage'
    static_bucket_name = config['S3_BUCKET_NAME_BASE'] + '-static'
    media_bucket_name = config['S3_BUCKET_NAME_BASE'] + '-media'
    template = make_storage_template()

    cloudformation = boto3.client('cloudformation')
    response = cloudformation.create_stack(
        StackName=stack_name,
        TemplateBody=str(template),
        Tags=load_tags(),
        Parameters=[
            {
                'ParameterKey': 'StaticBucketName',
                'ParameterValue': static_bucket_name,
            },
            {
                'ParameterKey': 'MediaBucketName',
                'ParameterValue': media_bucket_name,
            },
        ],
    )
    sys.stdout.write("{}\n".format(json.dumps(response, indent=2)))
def main():
    parser = argparse.ArgumentParser(
        version=VERSION,
        description="""Script to generate AWS CloudFormation JSON templates
        used to create Refinery Platform stacks"""
    )
    parser.add_argument(
        'command', choices=('name', 'dump', 'create')
    )
    args = parser.parse_args()

    config, config_yaml = load_config()

    if args.command == 'name':
        sys.stdout.write("{}\n".format(config['STACK_NAME']))
    elif args.command == 'dump':
        template = make_template(config, config_yaml)
        sys.stdout.write("{}\n".format(template))
    elif args.command == 'create':
        template = make_template(config, config_yaml)
        ensure_s3_bucket(config['S3_LOG_BUCKET'])
        cloudformation = boto3.client('cloudformation')
        response = cloudformation.create_stack(
            StackName=config['STACK_NAME'],
            TemplateBody=str(template),
            Capabilities=['CAPABILITY_IAM'],
            Tags=load_tags()
        )
        sys.stdout.write("{}\n".format(json.dumps(response, indent=2)))
Esempio n. 3
0
 def __init__(self, args, dataset_path, tags_path, trim):
     self.args = args
     self.meta, self.elems = load_dataset(dataset_path)
     self.tags_path = tags_path
     self.pos, self.dep = load_tags(tags_path)
     self.samples = self._create_samples(trim)
     self.tokenizers = None
     self.batch_size = args.batch_size if 'batch_size' in args else 1
     self.pad_token_id = self.tokenizers.pad_token_id \
         if self.tokenizers is not None else 0
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser(
        version=VERSION,
        description="""Script to generate AWS CloudFormation JSON templates
        used to create Refinery Platform stacks"""
    )
    parser.add_argument(
        'command', choices=('name', 'dump', 'create')
    )
    args = parser.parse_args()

    config, config_yaml = load_config()

    if args.command == 'name':
        sys.stdout.write("{}\n".format(config['STACK_NAME']))
    elif args.command == 'dump':
        template = make_template(config, config_yaml)
        sys.stdout.write("{}\n".format(template))
    elif args.command == 'create':
        template = make_template(config, config_yaml)
        ensure_s3_bucket(config['S3_LOG_BUCKET'])
        cloudformation = boto3.client('cloudformation')
        response = cloudformation.create_stack(
            StackName=config['STACK_NAME'],
            TemplateBody=str(template),
            Capabilities=['CAPABILITY_IAM'],
            Tags=load_tags(),
            Parameters=[
                {
                    'ParameterKey': 'IdentityPoolName',
                    'ParameterValue': config['COGNITO_IDENTITY_POOL_NAME']
                },
                {
                    'ParameterKey': 'DeveloperProviderName',
                    'ParameterValue': config['COGNITO_DEVELOPER_PROVIDER_NAME']
                },
                {
                    'ParameterKey': 'StorageStackName',
                    'ParameterValue': config['STACK_NAME'] + 'Storage'
                },
            ]
        )
        sys.stdout.write("{}\n".format(json.dumps(response, indent=2)))
Esempio n. 5
0
def main():
    try:
        with open(REFINERY_CONFIG_FILE) as config_file:
            config = yaml.load(config_file)
    except (IOError, yaml.YAMLError) as exc:
        sys.stderr.write("Error reading {}: {}\n".format(
            REFINERY_CONFIG_FILE, exc))
        raise RuntimeError

    stack_name = config['STACK_NAME'] + 'Storage'
    template = make_storage_template()

    cloudformation = boto3.client('cloudformation')
    response = cloudformation.create_stack(
        StackName=stack_name,
        TemplateBody=str(template),
        Capabilities=['CAPABILITY_IAM'],
        Tags=load_tags(),
        Parameters=[
            {
                'ParameterKey': 'StaticBucketName',
                'ParameterValue': config['S3_BUCKET_NAME_BASE'] + '-static',
            },
            {
                'ParameterKey': 'MediaBucketName',
                'ParameterValue': config['S3_BUCKET_NAME_BASE'] + '-media',
            },
            {
                'ParameterKey': 'IdentityPoolName',
                'ParameterValue': config['COGNITO_IDENTITY_POOL_NAME']
            },
            {
                'ParameterKey': 'DeveloperProviderName',
                'ParameterValue': config['COGNITO_DEVELOPER_PROVIDER_NAME']
            },
        ],
    )
    sys.stdout.write("{}\n".format(json.dumps(response, indent=2)))
Esempio n. 6
0
def make_template(config, config_yaml):
    """Make a fresh CloudFormation template object and return it"""

    stack_name = config['STACK_NAME']

    # We discover the current git branch/commit so that the deployment script
    # can use it to clone the same commit
    commit = os.popen("""git rev-parse HEAD""").read().rstrip()
    assert commit

    assert "'" not in config['SITE_NAME']

    instance_tags = load_tags()

    # Stack Name is also used for instances.
    instance_tags.append({'Key': 'Name', 'Value': stack_name})

    # This tag is variable and can be specified by
    # template Parameter.
    instance_tags.append({
        'Key': functions.ref('SnapshotSchedulerTag'),
        'Value': 'default'
    })

    config['tags'] = instance_tags

    config_uri = save_s3_config(config)
    sys.stdout.write("Configuration saved to {}\n".format(config_uri))

    tls_rewrite = "false"
    if 'TLS_CERTIFICATE' in config:
        tls_rewrite = "true"

    # The userdata script is executed via CloudInit
    # It's made by concatenating a block of parameter variables,
    # with the bootstrap.sh script, and the aws.sh script
    user_data_script = functions.join(
        "",
        "#!/bin/sh\n",
        "CONFIG_YAML=",
        base64.b64encode(config_yaml),
        "\n",
        "CONFIG_JSON=",
        base64.b64encode(json.dumps(config)),
        "\n",
        "AWS_DEFAULT_REGION=",
        functions.ref("AWS::Region"),
        "\n",
        "RDS_ENDPOINT_ADDRESS=",
        functions.get_att('RDSInstance', 'Endpoint.Address'),
        "\n",
        "RDS_ENDPOINT_PORT=",
        functions.get_att('RDSInstance', 'Endpoint.Port'),
        "\n",
        "RDS_SUPERUSER_PASSWORD="******"\n",
        "RDS_ROLE=",
        config['RDS_ROLE'],
        "\n",
        "ADMIN=",
        config['ADMIN'],
        "\n",
        "DEFAULT_FROM_EMAIL=",
        config['DEFAULT_FROM_EMAIL'],
        "\n",
        "SERVER_EMAIL=",
        config['SERVER_EMAIL'],
        "\n",
        "IAM_SMTP_USER="******"\n",
        "export FACTER_TLS_REWRITE=",
        tls_rewrite,
        "\n",
        "S3_CONFIG_URI=",
        config['S3_CONFIG_URI'],
        "\n",
        "SITE_URL=",
        config['SITE_URL'],
        "\n",
        # May contain spaces, but can't contain "'"
        "SITE_NAME='",
        config['SITE_NAME'],
        "'\n",
        "GIT_BRANCH=",
        commit,
        "\n",
        "\n",
        open('bootstrap.sh').read(),
        open('aws.sh').read())

    cft = core.CloudFormationTemplate(description="Refinery Platform main")

    # This parameter tags the EC2 instances, and is intended to be used
    # with the AWS Reference Implementation EBS Snapshot Scheduler:
    # http://docs.aws.amazon.com/solutions/latest/ebs-snapshot-scheduler/welcome.html
    cft.parameters.add(
        core.Parameter(
            'SnapshotSchedulerTag', 'String', {
                'Default':
                'scheduler:ebs-snapshot',
                'Description':
                "Tag added to EC2 Instances so that "
                "the EBS Snapshot Scheduler will recognise them.",
            }))
    cft.parameters.add(
        core.Parameter(
            'IdentityPoolName', 'String', {
                'Default': 'Refinery Platform',
                'Description': 'Name of Cognito identity pool for S3 uploads',
            }))
    cft.parameters.add(
        core.Parameter(
            'DeveloperProviderName', 'String', {
                'Default':
                'login.refinery',
                'Description':
                '"domain" by which Cognito will refer to users',
                'AllowedPattern':
                '[a-z\-\.]+',
                'ConstraintDescription':
                'must only contain lower case letters, periods, '
                'underscores, and hyphens'
            }))
    cft.parameters.add(
        core.Parameter(
            'StorageStackName', 'String', {
                'Default':
                '${AWS::StackName}Storage',
                'Description':
                'Name of the S3 storage stack for Django '
                'static and media files',
            }))

    rds_properties = {
        "AllocatedStorage": "5",
        "AutoMinorVersionUpgrade": False,
        "BackupRetentionPeriod": "15",
        "CopyTagsToSnapshot": True,
        "DBInstanceClass": "db.t2.small",  # todo:?
        "DBInstanceIdentifier": config['RDS_NAME'],
        "Engine": "postgres",
        "EngineVersion": "9.3.14",
        # "KmsKeyId" ?
        "MasterUsername": "******",
        "MasterUserPassword": config['RDS_SUPERUSER_PASSWORD'],
        "MultiAZ": False,
        "Port": "5432",
        "PubliclyAccessible": False,
        "StorageType": "gp2",
        "Tags": instance_tags,  # todo: Should be different?
        "VPCSecurityGroups":
        [functions.get_att('RDSSecurityGroup', 'GroupId')],
    }

    if 'RDS_SNAPSHOT' in config:
        rds_properties['DBSnapshotIdentifier'] = config['RDS_SNAPSHOT']

    cft.resources.rds_instance = core.Resource(
        'RDSInstance',
        'AWS::RDS::DBInstance',
        core.Properties(rds_properties),
        core.DeletionPolicy("Snapshot"),
    )

    volume_properties = {
        'Encrypted': True,
        'Size': config['DATA_VOLUME_SIZE'],
        'Tags': load_tags(),
        'AvailabilityZone': functions.get_att('WebInstance',
                                              'AvailabilityZone'),
        'VolumeType': config['DATA_VOLUME_TYPE'],
    }

    if 'DATA_SNAPSHOT' in config:
        volume_properties['SnapshotId'] = config['DATA_SNAPSHOT']

    # http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ec2-ebs-volume.html
    cft.resources.ebs = core.Resource(
        'RefineryData',
        'AWS::EC2::Volume',
        core.Properties(volume_properties),
        core.DeletionPolicy("Snapshot"),
    )

    cft.resources.ec2_instance = core.Resource(
        'WebInstance',
        'AWS::EC2::Instance',
        core.Properties({
            'ImageId':
            'ami-d05e75b8',
            'InstanceType':
            'm3.medium',
            'UserData':
            functions.base64(user_data_script),
            'KeyName':
            config['KEY_NAME'],
            'IamInstanceProfile':
            functions.ref('WebInstanceProfile'),
            'SecurityGroups': [functions.ref("InstanceSecurityGroup")],
            'Tags':
            instance_tags,
        }),
        core.DependsOn(['RDSInstance']),
    )

    cft.resources.instance_profile = core.Resource(
        'WebInstanceProfile', 'AWS::IAM::InstanceProfile',
        core.Properties({
            'Path': '/',
            'Roles': [functions.ref('WebInstanceRole')]
        }))

    cft.resources.web_role = core.Resource(
        'WebInstanceRole',
        'AWS::IAM::Role',
        core.Properties({
            # http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-iam-role.html#cfn-iam-role-templateexamples
            "AssumeRolePolicyDocument": {
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect": "Allow",
                    "Principal": {
                        "Service": ["ec2.amazonaws.com"]
                    },
                    "Action": ["sts:AssumeRole"]
                }]
            },
            'ManagedPolicyArns': [
                'arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess',
                'arn:aws:iam::aws:policy/AmazonRDSReadOnlyAccess',
                'arn:aws:iam::aws:policy/AmazonS3FullAccess'
            ],
            'Path':
            '/',
            'Policies': [{
                'PolicyName': "CreateAccessKey",
                'PolicyDocument': {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect": "Allow",
                        "Action": ["iam:CreateAccessKey"],
                        "Resource": ["*"]
                    }]
                },
            }, {
                'PolicyName': "CreateSnapshot",
                'PolicyDocument': {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect": "Allow",
                        "Action": ["ec2:CreateSnapshot"],
                        "Resource": ["*"]
                    }]
                }
            }, {
                'PolicyName': "CreateDBSnapshot",
                'PolicyDocument': {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect": "Allow",
                        "Action": ["rds:CreateDBSnapshot"],
                        "Resource": ["*"]
                    }]
                }
            }, {
                'PolicyName': "CreateTags",
                'PolicyDocument': {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect": "Allow",
                        "Action": ["ec2:CreateTags"],
                        "Resource": "*"
                    }]
                }
            }, {
                "PolicyName": "CognitoAccess",
                "PolicyDocument": {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect":
                        "Allow",
                        "Action": [
                            "cognito-identity:ListIdentityPools",
                        ],
                        "Resource":
                        "arn:aws:cognito-identity:*"
                    }, {
                        "Effect":
                        "Allow",
                        "Action": [
                            "cognito-identity:"
                            "GetOpenIdTokenForDeveloperIdentity"
                        ],
                        "Resource": {
                            "Fn::Sub": [
                                "arn:aws:cognito-identity:"
                                "${AWS::Region}:${AWS::AccountId}:"
                                "identitypool/${Pool}", {
                                    "Pool": functions.ref('IdentityPool')
                                }
                            ]
                        }
                    }]
                }
            }]
        }))

    cft.resources.smtp_user = core.Resource(
        'RefinerySMTPUser', 'AWS::IAM::User',
        core.Properties({
            'Policies': [{
                'PolicyName': "SESSendingAccess",
                'PolicyDocument': {
                    "Version":
                    "2012-10-17",
                    "Statement": [{
                        "Effect": "Allow",
                        "Action": "ses:SendRawEmail",
                        "Resource": "*"
                    }]
                }
            }]
        }))

    cft.resources.mount = core.Resource(
        'RefineryVolume', 'AWS::EC2::VolumeAttachment',
        core.Properties({
            'Device': '/dev/xvdr',
            'InstanceId': functions.ref('WebInstance'),
            'VolumeId': functions.ref('RefineryData'),
        }))

    # Security Group for Elastic Load Balancer
    # (public facing).
    cft.resources.elbsg = core.Resource(
        'ELBSecurityGroup',
        'AWS::EC2::SecurityGroup',
        core.Properties({
            'GroupDescription':
            "Refinery ELB",
            # Egress Rule defined via
            # AWS::EC2::SecurityGroupEgress resource,
            # to avoid circularity (below).
            # See http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-ec2-security-group.html # noqa: E501
            'SecurityGroupIngress': [
                {
                    "IpProtocol": "tcp",
                    "FromPort": "80",
                    "ToPort": "80",
                    "CidrIp": "0.0.0.0/0",
                },
                {
                    "IpProtocol": "tcp",
                    "FromPort": "443",
                    "ToPort": "443",
                    "CidrIp": "0.0.0.0/0",
                },
            ],
        }))

    cft.resources.elbegress = core.Resource(
        'ELBEgress', 'AWS::EC2::SecurityGroupEgress',
        core.Properties({
            "GroupId":
            functions.get_att('ELBSecurityGroup', 'GroupId'),
            "IpProtocol":
            "tcp",
            "FromPort":
            "80",
            "ToPort":
            "80",
            "DestinationSecurityGroupId":
            functions.get_att('InstanceSecurityGroup', 'GroupId'),
        }))

    # Security Group for EC2- instance.
    cft.resources.instancesg = core.Resource(
        'InstanceSecurityGroup',
        'AWS::EC2::SecurityGroup',
        core.Properties({
            'GroupDescription':
            "Refinery EC2 Instance",
            'SecurityGroupEgress': [],
            'SecurityGroupIngress': [
                {
                    "IpProtocol": "tcp",
                    "FromPort": "22",
                    "ToPort": "22",
                    "CidrIp": "0.0.0.0/0",
                },
                {
                    "IpProtocol":
                    "tcp",
                    "FromPort":
                    "80",
                    "ToPort":
                    "80",
                    # "CidrIp": "0.0.0.0/0",
                    # Only accept connections from the ELB.
                    "SourceSecurityGroupId":
                    functions.get_att('ELBSecurityGroup', 'GroupId'),
                },
            ],
        }))

    # Security Group for RDS instance.
    cft.resources.rdssg = core.Resource(
        'RDSSecurityGroup',
        'AWS::EC2::SecurityGroup',
        core.Properties({
            'GroupDescription':
            "Refinery RDS",
            'SecurityGroupEgress': [
                # We would like to remove all egress rules here,
                # but you can't do that with this version
                # of CloudFormation.
                # We decided that the hacky workarounds are
                # not worth it.
            ],
            'SecurityGroupIngress': [
                {
                    "IpProtocol":
                    "tcp",
                    "FromPort":
                    "5432",
                    "ToPort":
                    "5432",
                    # Only accept connections from the
                    # Instance Security Group.
                    "SourceSecurityGroupId":
                    functions.get_att('InstanceSecurityGroup', 'GroupId'),
                },
            ],
        }))

    # ELB per
    # http://cfn-pyplates.readthedocs.io/en/latest/examples/options/template.html

    # Insecure, Port 80, HTTP listener
    http_listener = {
        'LoadBalancerPort': '80',
        'Protocol': 'HTTP',
        'InstanceProtocol': 'HTTP',
        'InstancePort': '80',
        'PolicyNames': []
    }
    listeners = [http_listener]

    if 'TLS_CERTIFICATE' in config:
        # Secure, Port 443, HTTPS listener
        https_listener = {
            'LoadBalancerPort': '443',
            'Protocol': 'HTTPS',
            'InstanceProtocol': 'HTTP',
            'InstancePort': '80',
            'PolicyNames': [],
            'SSLCertificateId': config['TLS_CERTIFICATE']
        }
        listeners.append(https_listener)

    cft.resources.elb = core.Resource(
        'LoadBalancer',
        'AWS::ElasticLoadBalancing::LoadBalancer',
        {
            'AccessLoggingPolicy': {
                'EmitInterval': functions.ref('LogInterval'),
                'Enabled': True,
                'S3BucketName': config['S3_LOG_BUCKET'],
                # 'S3BucketPrefix' unused
            },
            'AvailabilityZones':
            [functions.get_att('WebInstance', 'AvailabilityZone')],
            'ConnectionSettings': {
                'IdleTimeout': 1800  # seconds
            },
            'HealthCheck': {
                'HealthyThreshold': '2',
                'Interval': '30',
                'Target': 'HTTP:80/',
                'Timeout': '5',
                'UnhealthyThreshold': '4'
            },
            'Instances': [functions.ref('WebInstance')],
            'LoadBalancerName':
            config['STACK_NAME'],
            'Listeners':
            listeners,
            'SecurityGroups':
            [functions.get_att('ELBSecurityGroup', 'GroupId')],
            'Tags':
            load_tags(),
        })
    cft.parameters.add(
        core.Parameter(
            'LogInterval', 'Number', {
                'Default':
                60,
                'Description':
                "How often, in minutes, the ELB emits its logs to the "
                "configured S3 bucket. The ELB log facility restricts "
                "this to be 5 or 60.",
            }))

    # Cognito Identity Pool for Developer Authenticated Identities Authflow
    # http://docs.aws.amazon.com/cognito/latest/developerguide/authentication-flow.html
    cft.resources.add(
        core.Resource(
            'IdentityPool', 'AWS::Cognito::IdentityPool',
            core.Properties({
                'IdentityPoolName':
                functions.ref('IdentityPoolName'),
                'AllowUnauthenticatedIdentities':
                False,
                'DeveloperProviderName':
                functions.ref('DeveloperProviderName'),
            })))
    cft.resources.add(
        core.Resource(
            'IdentityPoolAuthenticatedRole',
            'AWS::Cognito::IdentityPoolRoleAttachment',
            core.Properties({
                'IdentityPoolId': functions.ref('IdentityPool'),
                'Roles': {
                    'authenticated':
                    functions.get_att('CognitoS3UploadRole', 'Arn'),
                }
            })))
    upload_role_trust_policy = {
        "Version":
        "2012-10-17",
        "Statement": [{
            "Effect": "Allow",
            "Principal": {
                "Federated": "cognito-identity.amazonaws.com"
            },
            "Action": "sts:AssumeRoleWithWebIdentity",
            "Condition": {
                "StringEquals": {
                    "cognito-identity.amazonaws.com:aud":
                    functions.ref('IdentityPool')
                },
                "ForAnyValue:StringLike": {
                    "cognito-identity.amazonaws.com:amr": "authenticated"
                }
            }
        }]
    }
    upload_access_policy = {
        "Version":
        "2012-10-17",
        "Statement": [{
            "Effect": "Allow",
            "Action": ["cognito-identity:*"],
            "Resource": "*"
        }, {
            "Action": ["s3:PutObject", "s3:AbortMultipartUpload"],
            "Effect": "Allow",
            "Resource": {
                "Fn::Sub": [
                    "arn:aws:s3:::${MediaBucket}/uploads/"
                    "${!cognito-identity.amazonaws.com:sub}/*", {
                        "MediaBucket": {
                            "Fn::ImportValue": {
                                "Fn::Sub": "${StorageStackName}Media"
                            }
                        }
                    }
                ]
            }
        }]
    }
    cft.resources.add(
        core.Resource(
            'CognitoS3UploadRole', 'AWS::IAM::Role',
            core.Properties({
                'AssumeRolePolicyDocument':
                upload_role_trust_policy,
                'Policies': [{
                    'PolicyName': 'AuthenticatedS3UploadPolicy',
                    'PolicyDocument': upload_access_policy,
                }]
            })))

    # See http://docs.aws.amazon.com/elasticloadbalancing/latest/classic/enable-access-logs.html#attach-bucket-policy # noqa: E501
    # for full list of region--principal identifiers.
    cft.mappings.region = core.Mapping(
        'Region', {'us-east-1': {
            'ELBPrincipal': '127311923021'
        }})

    cft.resources.log_policy = core.Resource(
        'LogBucketPolicy', 'AWS::S3::BucketPolicy',
        core.Properties({
            'Bucket': config['S3_LOG_BUCKET'],
            'PolicyDocument': {
                'Statement': [{
                    "Action": ["s3:PutObject"],
                    "Effect":
                    "Allow",
                    "Resource":
                    functions.join("", "arn:aws:s3:::",
                                   config['S3_LOG_BUCKET'], "/AWSLogs/",
                                   functions.ref("AWS::AccountId"), "/*"),
                    "Principal": {
                        "AWS": [
                            functions.find_in_map('Region',
                                                  functions.ref("AWS::Region"),
                                                  'ELBPrincipal'),
                        ]
                    }
                }]
            }
        }))

    return cft
Esempio n. 7
0
    
    choice = input("Enter your choice: ")
    
    return choice


if __name__ == "__main__":
    
    #Load data
    try:
        ratings = Data()
        ratings.load('../data/myratings.data')
    except:
        ratings = load_ratings('../data/ml-latest-small/ratings.csv')
    movies = load_movies('../data/ml-latest-small/movies.csv')
    tags = load_tags('../data/ml-latest-small/tags.csv')
    
    os.system('clear')
    print """
#####################################################
####           COMMAND LINE RECOMMENDER          ####
#####################################################

A minimalistic command line recommender system using
SVD decomposistion.
"""
    
    # Create a user
    user_id = str(raw_input("Please enter your username: "))
    user = User(user_id)
    
Esempio n. 8
0
    except IndexError:
        data_path = '/data'
    
    
    #Load data
    ratings = Data()
    if os.path.isfile(data_path + '/myratings.data'):
        ratings.load(data_path + '/myratings.data')
    else:
        try:
            ratings = load_ratings(data_path + '/ratings.csv')
        except IOError:
            raise Exception('Data not found. Please specify it.'
                            % data_path)
    movies = load_movies(data_path + '/movies.csv')
    tags = load_tags(data_path + '/tags.csv')
            
    os.system('clear')
    print """
#####################################################
####           COMMAND LINE RECOMMENDER          ####
#####################################################

A minimalistic command line recommender system using
SVD decomposistion.
"""
    
    #Create and train model
    svd = create_svd_model(ratings)
    
    N = 10000
Esempio n. 9
0
 def checked_tags(self):
     return [
         set(utils.load_tags(filename)) for filename in self.tag_filenames
     ]
Esempio n. 10
0
    elif args.action == 'normalize':

        grammar = read_or_build_grammar(args)
        grammar = normalize(grammar)
        write_grammar(grammar, args.outgrammar)

    elif args.action == 'add-lexicon':

        tags = build_tags(args)
        save_tags(tags, args.json)

    elif args.action == 'parse':

        if args.json:
            tags = load_tags(args.json)
        else:
            tags = build_tags(args)

        sents = get_corpus(args.lexicon, 'unparsed', args.num_sents)
        parses = parse_sents(tags, sents, args.words)
        pretty_print(sents, parses)

    elif args.action == 'analyze':

        test_data = [(filename, load_tags(filename)) for filename in args.test]
        truth_tags = load_tags(args.truth)
        parsed_sents = get_corpus(args.lexicon, 'parsed', args.num_sents)
        unparsed_sents = get_corpus(args.lexicon, 'unparsed', args.num_sents)

        (fields, truth_results,